source("custom_functions.R")Mucosal microbiota alterations in primary sclerosis cholangitis persist after liver transplantation and are associated with clinical features independently of geography
3. Hypothesis: PSC disease is associated with IBD
1 Introduction
1.1 About
An IBD vs. no-IBD comparison was performed within PSC patients (pre-LTx and rPSC individuals combined) to discover IBD-specific pattern.
IBD vs no_IBD analysis on merged data
Alpha diversity –> group effect, country effect, interaction effect
Beta diversity – PERMANOVA, PCA -> group effect, country effect, interaction effect
DAA ->
o Group effect – linDA + MaAsLin2 intersection
o Country effect – – taxa with significant interaction effect were excluded based on individual post-hoc analysis. Taxa had to have the same direction in both countries
IBD vs no_IBD – finding IBD-specific pattern in PSC patients
Importing libraries and custom functions built for this analysis
1.2 Data Import
Importing ASV, taxa and metadata tables for both Czech and Norway samples.
Czech
MICROBIOME
path = "../../data/analysis_ready_data/ikem/"
asv_tab_ikem <- as.data.frame(fread(file.path(path,"asv_table_ikem.csv"),
check.names = FALSE))
taxa_tab_ikem <- as.data.frame(fread(file.path(path,"taxa_table_ikem.csv"),
check.names = FALSE))
metadata_ikem <- as.data.frame(fread(file.path(path,"metadata_ikem.csv"),
check.names = FALSE))IBD info
path = "../../data/clinical_data/"
cz <- read.csv(file.path(path,"clinical_metadata_cz.csv")) %>% dplyr::select(SampleID,PatientID,PSC_IBD,Group)
cz$PatientID <- as.character(cz$PatientID)
metadata_ikem <- merge(metadata_ikem,cz,by=c("SampleID", "Group"),all=TRUE)Norway
MICROBIOME
path = "../../data/analysis_ready_data/norway/"
asv_tab_norway <- as.data.frame(fread(file.path(path,"asv_table_norway.csv"),
check.names = FALSE))
taxa_tab_norway <- as.data.frame(fread(file.path(path,"taxa_table_norway.csv"),
check.names = FALSE))
metadata_norway <- as.data.frame(fread(file.path(path,"metadata_norway.csv"),
check.names = FALSE))IBD info
path = "../../data/clinical_data/"
no <- read.csv(file.path(path,"clinical_metadata_no.csv")) %>% dplyr::select(SampleID,subjectid,IBD, Group)1.2.1 Merging
Merging clinical metadata
clinical_metadata <- bind_rows(
cz %>% dplyr::mutate(PSC_IBD = case_when(
PSC_IBD == "0" ~ "no_ibd",
PSC_IBD == "1" ~ "ibd",
)) %>% `colnames<-`(c("SampleID","PatientID","IBD","Group")),
no %>% `colnames<-`(c("SampleID","PatientID","IBD","Group")) %>%
mutate(PatientID=paste0("NO_",PatientID)))Terminal ileum
ileum_data <- merging_data(asv_tab_1=asv_tab_ikem,
asv_tab_2=asv_tab_norway,
taxa_tab_1=taxa_tab_ikem,
taxa_tab_2=taxa_tab_norway,
metadata_1=metadata_ikem,
metadata_2=metadata_norway,
segment="TI",Q="Q3")Removing 1498 ASV(s)
Removing 1834 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
Removing 979 ASV(s)
ileum_asv_tab <- ileum_data[[1]]
ileum_taxa_tab <- ileum_data[[2]]
ileum_metadata <- ileum_data[[3]]
ileum_metadata <- merge(ileum_metadata,
clinical_metadata %>%
dplyr::select(-Group),
by=c("SampleID"), all.x = TRUE) %>%
dplyr::select(-PatientID,Group) %>%
dplyr::mutate(Group=IBD) %>% dplyr::select(-IBD)Colon
colon_data <- merging_data(asv_tab_1=asv_tab_ikem,
asv_tab_2=asv_tab_norway,
taxa_tab_1=taxa_tab_ikem,
taxa_tab_2=taxa_tab_norway,
metadata_1=metadata_ikem,
metadata_2=metadata_norway,
segment="colon",Q="Q3")Removing 739 ASV(s)
Removing 266 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
Removing 1157 ASV(s)
colon_asv_tab <- colon_data[[1]]
colon_taxa_tab <- colon_data[[2]]
colon_metadata <- colon_data[[3]]
colon_metadata <- merge(colon_metadata,
clinical_metadata %>%
dplyr::select(-Group),
by=c("SampleID"), all.x = TRUE) %>%
dplyr::select(-PatientID,Group) %>%
dplyr::mutate(Group=IBD) %>% dplyr::select(-IBD)1.3 Import clinical data
1.3.1 CZ
# clinical metadata
metadata_clinical <- read.csv("../../data/clinical_data/clinical_metadata_cz.csv")
metadata_clinical$PatientID <- as.character(metadata_clinical$PatientID)
# DYSBIOSIS
metadata_dysbiosis <- read.csv("../../data/clinical_data/dysbiosis_metadata.csv") %>%
dplyr::filter(Country=="CZ")
# ALPHA DIVERSITY
metadata_alpha_ileum <- read.csv(
"../results/Q1/alpha_diversity/alpha_indices_terminal_ileum.csv") %>%
dplyr::filter(Country=="CZ")
metadata_alpha_colon <- read.csv(
"../results/Q1/alpha_diversity/alpha_indices_colon.csv") %>%
dplyr::filter(Country=="CZ")
metadata_alpha <- rbind(metadata_alpha_ileum,metadata_alpha_colon) %>%
mutate(PatientID=Patient) %>%
dplyr::select(-c(Patient, Group))
# MERGING
metadata_cz <- full_join(metadata_clinical, metadata_dysbiosis, by=c("SampleID","Matrix","PatientID","Group","Country"))
metadata_cz <- full_join(metadata_cz, metadata_alpha, by=c("SampleID","PatientID","Country"))
metadata_cz$Group <- factor(metadata_cz$Group,levels = c("healthy","pre_ltx","non-rPSC","rPSC"))1.3.2 NO
# clinical metadata
metadata_clinical <- read.csv("../../data/clinical_data/clinical_metadata_no.csv")
metadata_clinical %<>% mutate(PatientID=subjectid,
Matrix=segment) %>%
dplyr::select(-subjectid,-segment)
metadata_clinical$PatientID <- as.character(paste0("NO_",metadata_clinical$PatientID))
# DYSBIOSIS
metadata_dysbiosis <- read.csv("../../data/clinical_data/dysbiosis_metadata.csv") %>%
dplyr::filter(Country=="NO") %>%
dplyr::select(-c(Patient))
# ALPHA DIVERSITY
metadata_alpha_ileum <- read.csv(
"../results/Q1/alpha_diversity/alpha_indices_terminal_ileum.csv") %>%
dplyr::filter(Country=="NO")
metadata_alpha_colon <- read.csv(
"../results/Q1/alpha_diversity/alpha_indices_colon.csv") %>%
dplyr::filter(Country=="NO")
metadata_alpha <- rbind(metadata_alpha_ileum,metadata_alpha_colon) %>%
mutate(PatientID=Patient) %>%
dplyr::select(-c(Patient, Group))
# MERGING
metadata_no <- full_join(metadata_clinical, metadata_dysbiosis, by=c("SampleID","Matrix","PatientID","Group","Country"))
metadata_no <- full_join(metadata_no, metadata_alpha, by=c("SampleID","PatientID","Country"))
metadata_no$Group <- factor(metadata_no$Group,levels = c("healthy","pre_ltx","non-rPSC","rPSC"))1.3.3 Merging metadata
metadata_cz %<>% dplyr::mutate(Calprotectin=Fecal.calprotectin,
AOM=AOM_score,
APRI=APRI_score,
FIB4=FIB4_score) %>%
dplyr::select(SampleID,Matrix,PatientID,Group,Country,Bilirubin,ALP,Calprotectin,
MAYO_PSC,AOM,APRI,FIB4,Platelets,AST,Creatinine,
Albumin,ALT,PSC_IBD,GGT,INR,Albumin,Nancy_max,eMAYO,MAYO_dai,
dys_unfiltered_asv,dys_unfiltered_genus,
dys_filtered_asv,dys_filtered_genus,
Observe,Shannon,Simpson,Pielou)
metadata_no %<>% dplyr::mutate(Platelets=TRC,
Creatinine=Kreatinin,
MAYO_PSC=Mayo_score,
AST=ASAT/60,
ALT=ALAT/60,
PSC_IBD=IBD,
Bilirubin=Bilirubin*17.1,
ALP=ALP/60,
Albumin=Albumin*10) %>%
dplyr::mutate(PSC_IBD = case_when(
PSC_IBD == "no_ibd" ~ "0",
PSC_IBD == "ibd" ~ "1",
TRUE ~ Group # Keep other values as is
)) %>%
dplyr::select(SampleID,Matrix,PatientID,Group,Country,Bilirubin,ALP,Calprotectin,
MAYO_PSC,AOM,APRI,FIB4,Platelets,AST,Creatinine,
Albumin,ALT,PSC_IBD,
dys_unfiltered_asv,dys_unfiltered_genus,
dys_filtered_asv,dys_filtered_genus,
Observe,Shannon,Simpson,Pielou)
metadata_final <- merge(metadata_cz,metadata_no,all = TRUE)
metadata_final$Calprotectin[metadata_final$Calprotectin==">6000"] <- 6000
metadata_final$Calprotectin <- as.numeric(metadata_final$Calprotectin)1.4 CHI SQUARE TEST
1.4.1 pre_LTx, rPSC, non-rPSC
cz <- cz %>% dplyr::group_by(PSC_IBD,Group) %>%
distinct(PatientID, .keep_all = TRUE) %>%
count() %>% drop_na() %>%
dplyr::mutate(PSC_IBD = case_when(
PSC_IBD == "0" ~ "no_ibd",
PSC_IBD == "1" ~ "ibd",
)) %>% `colnames<-`(c("IBD","Group","n"))
no <- no %>% dplyr::group_by(IBD,Group) %>%
distinct(subjectid, .keep_all = TRUE) %>%
count() %>% drop_na()
# Summarize by IBD and Group
data <- bind_rows(cz, no) %>%
group_by(IBD, Group) %>%
summarise(n = sum(n), .groups = "drop") %>%
pivot_wider(names_from = Group, values_from = n) %>%
column_to_rownames("IBD") %>%
as.matrix()
chi_res <- chisq.test(data)
chi_res$expected non-rPSC pre_ltx rPSC
ibd 101.12774 96.11314 31.759124
no_ibd 19.87226 18.88686 6.240876
chi_res
Pearson's Chi-squared test
data: data
X-squared = 3.881, df = 2, p-value = 0.1436
1.4.2 pre_LTx + rPSC (PSC patients), non-rPSC
data2 <- data %>% as.data.frame() %>%
dplyr::mutate(PSC=pre_ltx+rPSC) %>%
dplyr::select(PSC,`non-rPSC`) %>% as.matrix()
chi_res <- chisq.test(data2)
chi_res$expected PSC non-rPSC
ibd 127.87226 101.12774
no_ibd 25.12774 19.87226
chi_res
Pearson's Chi-squared test with Yates' continuity correction
data: data2
X-squared = 0.20305, df = 1, p-value = 0.6523
1.4.3 rPSC, non-rPSC
data3 <- data %>% as.data.frame() %>%
dplyr::select(rPSC,`non-rPSC`) %>% as.matrix()
chi_res <- chisq.test(data3)
chi_res$expected rPSC non-rPSC
ibd 32.981132 105.01887
no_ibd 5.018868 15.98113
chi_res
Pearson's Chi-squared test with Yates' continuity correction
data: data3
X-squared = 0.69593, df = 1, p-value = 0.4042
2 Data Analysis - Terminal ileum
segment="terminal_ileum"2.1 Filtering
Rules:
sequencing depth > 10000
nearZeroVar() with default settings
Rarefaction Curve
path="../intermediate_files/rarecurves"
seq_depth_threshold <- 10000ps <- construct_phyloseq(ileum_asv_tab,ileum_taxa_tab,ileum_metadata)
rareres <- get_rarecurve(obj=ps, chunks=500)
save(rareres,file = file.path(path,"rarefaction_ileum.Rdata"))load(file.path(path,"rarefaction_ileum.Rdata"))
prare <- ggrarecurve(obj=rareres,
factorNames="Country",
indexNames=c("Observe")) +
theme_bw() +
theme(axis.text=element_text(size=8), panel.grid=element_blank(),
strip.background = element_rect(colour=NA,fill="grey"),
strip.text.x = element_text(face="bold")) +
geom_vline(xintercept = seq_depth_threshold,
linetype="dashed",
color = "red") +
xlim(0, 20000)The color has been set automatically, you can reset it manually by adding scale_color_manual(values=yourcolors)
prareLibrary size
read_counts(ileum_asv_tab, line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
2.1.1 Sequencing depth
data_filt <- seq_depth_filtering(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
seq_depth_threshold = 10000)Removing 68 ASV(s)
filt_ileum_asv_tab <- data_filt[[1]]; alpha_ileum_asv_tab <- filt_ileum_asv_tab
filt_ileum_taxa_tab <- data_filt[[2]]; alpha_ileum_taxa_tab <- filt_ileum_taxa_tab
filt_ileum_metadata <- data_filt[[3]]; alpha_ileum_metadata <- filt_ileum_metadata
seq_step <- dim(filt_ileum_asv_tab)[1]Library size
read_counts(filt_ileum_asv_tab,line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
2.1.2 NearZeroVar
data_filt <- nearzerovar_filtering(filt_ileum_asv_tab,
filt_ileum_taxa_tab,
filt_ileum_metadata)
filt_ileum_asv_tab <- data_filt[[1]]
filt_ileum_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_ileum_asv_tab)[1]Library size
read_counts(filt_ileum_asv_tab,line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
2.1.3 Final Counts
final_counts_filtering(ileum_asv_tab,
filt_ileum_asv_tab,
filt_ileum_metadata,
seq_step, 0, nearzero_step) %>% `colnames<-`("Count") Count
Raw data: ASVs 4153
Raw data: Samples 214
Sequencing depth filt: ASVs 4085
Prevalence filt: ASVs 0
NearZeroVar filt: ASVs 449
Filt data: ASVs 449
Filt data: Samples 208
Filt data: Patients 208
Filt data: Patients.1 0
Filtered samples 6
Matrices TI
healthy 0
non-rPSC 0
rPSC 0
pre_ltx 0
post_ltx 0
ETOH 0
2.2 Alpha diversity
path = "../results/Q3/alpha_diversity"Calculation
# Construct MPSE object
alpha_ileum_metadata$Sample <- alpha_ileum_metadata$SampleID
ileum_mpse <- as.MPSE(construct_phyloseq(alpha_ileum_asv_tab,
alpha_ileum_taxa_tab,
alpha_ileum_metadata))
ileum_mpse %<>% mp_rrarefy(raresize = 10000,seed = 123)
# Calculate alpha diversity - rarefied counts
ileum_mpse %<>% mp_cal_alpha(.abundance=RareAbundance, force=TRUE)alpha_div_plots <- list()
# preparing data frame
alpha_data <- data.frame(SampleID=ileum_mpse$Sample.x,
Observe=ileum_mpse$Observe,
Shannon=ileum_mpse$Shannon,
Simpson=ileum_mpse$Simpson,
Pielou=ileum_mpse$Pielou,
Group=ileum_mpse$Group,
Country=ileum_mpse$Country,
Patient=ileum_mpse$Patient)
write.csv(alpha_data,file.path(path,paste0("alpha_indices_",segment,".csv")),
row.names = FALSE)2.2.1 Plots
p_boxplot_alpha <- alpha_diversity_countries(alpha_data,show_legend = FALSE)Using SampleID, Group, Country, Patient as id variables
Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
ℹ Please use the `linewidth` argument instead.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Using SampleID, Group, Country, Patient as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
# save the results
alpha_div_plots[[paste(segment,"Country")]] <- p_boxplot_alpha
# see the results
p_boxplot_alpha2.2.2 Linear Model
path = "../results/Q3/alpha_diversity"
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))Richness
results_model <- pairwise.lm(formula = "Observe ~ Group * Country",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_observe <- results_model[[1]]
results_model_observe_emeans <- results_model[[2]]
} else {
results_model_observe <- results_model
results_model_observe_emeans <- NA
}
# save the results
pc_observed <- list();
pc_observed[[segment]] <- results_model_observe# see the results
knitr::kable(results_model_observe,digits = 3,
caption = "Raw results of linear model of richness estimation.")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| no_ibd vs Groupibd | -5.135 | 13.890 | -0.370 | 0.712 |
| no_ibd , ibd - CZ vs NO | 0.000 | 17.652 | 0.000 | 1.000 |
| no_ibd vs Groupibd:CountryNO | -11.094 | 19.350 | -0.573 | 0.567 |
knitr::kable(results_model_observe_emeans,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
Shannon
results_model <- pairwise.lm(formula = "Shannon ~ Group * Country",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_shannon <- results_model[[1]]
results_model_shannon_emeans <- results_model[[2]]
} else {
results_model_shannon <- results_model
results_model_shannon_emeans <- NA
}
# save the results
pc_shannon <- list();
pc_shannon[[segment]] <- as.data.frame(results_model_shannon)# see the results
knitr::kable(results_model_shannon,digits = 3,
caption = "Raw results of linear model of Shannon estimation.")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| no_ibd vs Groupibd | 0.034 | 0.182 | 0.187 | 0.852 |
| no_ibd , ibd - CZ vs NO | -0.070 | 0.231 | -0.302 | 0.763 |
| no_ibd vs Groupibd:CountryNO | -0.171 | 0.254 | -0.676 | 0.500 |
knitr::kable(results_model_shannon_emeans,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
Simpson
results_model <- pairwise.lm(formula = "Simpson ~ Group * Country",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_simpson <- results_model[[1]]
results_model_simpson_emeans <- results_model[[2]]
} else {
results_model_simpson <- results_model
results_model_simpson_emeans <- NA
}
# save the results
pc_simpson <- list();
pc_simpson[[segment]] <- as.data.frame(results_model_simpson)# see the results
knitr::kable(results_model_simpson,digits = 3,
caption = "Raw results of linear model of Simpson estimation.")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| no_ibd vs Groupibd | 0.010 | 0.026 | 0.401 | 0.689 |
| no_ibd , ibd - CZ vs NO | 0.004 | 0.033 | 0.125 | 0.900 |
| no_ibd vs Groupibd:CountryNO | -0.031 | 0.036 | -0.853 | 0.395 |
knitr::kable(results_model_simpson_emeans,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
Pielou
results_model <- pairwise.lm(formula = "Pielou ~ Group * Country",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_pielou <- results_model[[1]]
results_model_pielou_emeans <- results_model[[2]]
} else {
results_model_pielou <- results_model
results_model_pielou_emeans <- NA
}
# save the results
pc_pielou <- list();
pc_pielou[[segment]] <- as.data.frame(results_model_pielou)# see the results
knitr::kable(results_model_pielou,digits = 3,
caption = "Raw results of linear model of Pielou estimation.")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| no_ibd vs Groupibd | 0.016 | 0.027 | 0.593 | 0.554 |
| no_ibd , ibd - CZ vs NO | -0.014 | 0.034 | -0.419 | 0.676 |
| no_ibd vs Groupibd:CountryNO | -0.025 | 0.037 | -0.674 | 0.501 |
knitr::kable(results_model_pielou_emeans,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
2.2.3 Saving results
alpha_list <- list(
Richness=pc_observed[[segment]],
Shannon=pc_shannon[[segment]])
write.xlsx(alpha_list,
file = file.path(path,paste0("alpha_diversity_results_",segment,".xlsx")))2.3 Beta diversity
Calculating Aitchison distance (euclidean distance on clr-transformed data), both at ASV and genus level.
2.3.1 Main analysis
Genus level, Aitchison distance
level="genus"path = "../results/Q3/beta_diversity"pairwise_aitchison_raw <- list()
pca_plots_list <- list()Aggregation, filtering
# Aggregation
genus_data <- aggregate_taxa(ileum_asv_tab,
ileum_taxa_tab,
taxonomic_level=level,
names=TRUE)
# Filtration
filt_data <- filtering_steps(genus_data[[1]],
genus_data[[2]],
ileum_metadata,
seq_depth_threshold=10000)Removing 6 ASV(s)
filt_ileum_genus_tab <- filt_data[[1]]
filt_ileum_genus_taxa <- filt_data[[2]]
filt_ileum_metadata <- filt_data[[3]]2.3.1.0.1 PERMANOVA
pairwise_df <- filt_ileum_genus_tab %>% column_to_rownames("SeqID") %>% t()
# main effect
pp_main <- pairwise.adonis(pairwise_df,
filt_ileum_metadata$Group,
covariate = filt_ileum_metadata$Country, sim.method = "robust.aitchison", p.adjust.m="BH")
# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, interaction = TRUE, sim.method = "robust.aitchison", p.adjust.m="BH")
# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]
cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols;
# save raw results
pairwise_aitchison_raw[[paste(level, segment)]] <- rbind(pp_factor,pp_cov,pp_fac.cov)# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 165.293 | 0.975 | 0.005 | 0.496 | 0.496 |
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd , Country | 1 | 609.984 | 3.599 | 0.017 | 0.001 | 0.001 | *** |
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd : Country | 1 | 185.468 | 1.095 | 0.005 | 0.259 | 0.259 |
Interaction check
interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.1]
if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
group2 <- unlist(strsplit(group2,split = " : "))[1]
result_list <- adonis_postanalysis(x=pairwise_df,
factors = filt_ileum_metadata$Group,
covariate = filt_ileum_metadata$Country,
group1 = group1,
group2 = group2)
print(result_list)
}
}2.3.1.0.2 Plots
PCA
p <- pca_plot_custom(filt_ileum_genus_tab,
filt_ileum_genus_taxa,
filt_ileum_metadata,
show_boxplots = TRUE,
variable = "Group", size=3, show_legend=FALSE)
# save the results
pca_plots_list[[paste(segment,level,"custom")]] <- p
# see the results
p2.3.1.1 PCA + correlations
variables= c("Calprotectin","Nancy_max","eMAYO","MAYO_dai")metadata_clinical <- metadata_final
metadata_clinical$PatientID <- as.character(metadata_clinical$PatientID)
metadata_ileum_pca <- merge(filt_ileum_metadata,metadata_clinical[,c("SampleID",variables)],by="SampleID",all.x=TRUE) %>% dplyr::select(SampleID, Group, Country,variables) Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
ℹ Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(variables)
# Now:
data %>% select(all_of(variables))
See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
res <- pca_plot_corr(filt_ileum_genus_tab,
filt_ileum_genus_taxa,
filt_ileum_metadata,
show_boxplots = FALSE,
variable = "Group", size=3,
segment=segment,
show_legend= TRUE,
clinical = TRUE, clinical_metadata = metadata_ileum_pca,
variables= c("Calprotectin","Nancy_max","eMAYO","MAYO_dai"))
res[[1]]
variable r p p.adj
Calprotectin Calprotectin 0.12134529 0.1027229 0.4108915
Nancy_max Nancy_max 0.02160555 0.8285032 0.8285032
eMAYO eMAYO 0.08919279 0.3702805 0.4937074
MAYO_dai MAYO_dai 0.10595651 0.2867767 0.4937074
[[2]]
variable r p p.adj
Calprotectin Calprotectin 0.02112069 0.7771752 0.7771752
Nancy_max Nancy_max 0.03027382 0.7614588 0.7771752
eMAYO eMAYO 0.04867828 0.6253437 0.7771752
MAYO_dai MAYO_dai 0.08530684 0.3915737 0.7771752
[[3]]
r p p_adjusted
Barnesiella -0.28802148 2.660865e-05 3.628452e-05
Butyricimonas 0.04762419 4.942348e-01 4.942348e-01
Odoribacter -0.16878430 1.489364e-02 1.595747e-02
Alistipes -0.52423416 0.000000e+00 0.000000e+00
Parabacteroides -0.29219274 2.010965e-05 3.016447e-05
Enterococcus 0.58191580 0.000000e+00 0.000000e+00
Coprococcus -0.36086396 1.060335e-07 2.053779e-07
Fusicatenibacter -0.36520459 7.295307e-08 2.053779e-07
Lachnoclostridium 0.21660586 1.711591e-03 2.139489e-03
Lachnospiraceae_FCS020_group -0.29541587 1.614984e-05 2.691641e-05
Oscillibacter -0.20365066 3.224972e-03 3.721122e-03
Faecalibacterium -0.36187478 9.723698e-08 2.053779e-07
Pseudomonas 0.50726238 0.000000e+00 0.000000e+00
Enterorhabdus 0.49231489 0.000000e+00 0.000000e+00
Holdemania 0.36048391 1.095349e-07 2.053779e-07
[[4]]
r p p_adjusted
Barnesiella -0.230021123 8.548387e-04 2.988334e-03
Butyricimonas -0.216541849 1.717109e-03 3.679520e-03
Odoribacter -0.172090114 1.302112e-02 2.143583e-02
Alistipes -0.223637537 1.195333e-03 2.988334e-03
Parabacteroides -0.117146202 9.195283e-02 1.253902e-01
Enterococcus -0.060688739 3.835622e-01 4.794528e-01
Coprococcus 0.308484422 6.465693e-06 3.232847e-05
Fusicatenibacter 0.224160279 1.163353e-03 2.988334e-03
Lachnoclostridium 0.455714956 4.924212e-12 7.386319e-11
Lachnospiraceae_FCS020_group -0.003417826 9.608962e-01 9.608962e-01
Oscillibacter -0.169807119 1.429055e-02 2.143583e-02
Faecalibacterium 0.353934967 1.905132e-07 1.428849e-06
Pseudomonas 0.190446091 5.929778e-03 1.111833e-02
Enterorhabdus -0.038188166 5.836507e-01 6.253401e-01
Holdemania -0.043982333 5.278616e-01 6.090710e-01
heatmap
df_heatmap <- melt(res[[4]] %>% rownames_to_column("SeqID") %>%
mutate(PCo2=r) %>% select(SeqID,PCo2))Using SeqID as id variables
labels_df <- melt(res[[4]] %>% rownames_to_column("SeqID") %>% select(SeqID,p_adjusted))Using SeqID as id variables
labels <- labels_df
labels$variable <- "PCo2"
labels$value <- NA
labels$value[labels_df$value<=0.001] <- "***"
labels$value[labels_df$value<=0.01] <- "**"
labels$value[labels_df$value<=0.05] <- "*"
p_mdi_ileum <- ggplot() +
geom_tile(data=df_heatmap, aes(variable, SeqID, fill= value)) +
theme_minimal() +
scale_fill_gradient2(name = "Rho", low = "blue", mid = "white", high = "red", midpoint = 0,limits = c(-0.7, 0.7)) +
geom_text(data=labels,aes(x=variable,y=SeqID,label=value,vjust=0.6)) +
xlab("") + ylab("")
p_mdi_ileumWarning: Removed 5 rows containing missing values or values outside the scale range
(`geom_text()`).
2.3.1.2 Saving results
write.xlsx(pairwise_aitchison_raw[[paste(level, segment)]],
file = file.path(path,
paste0("beta_diversity_results_", segment,".xlsx")))2.3.2 Supplementary analysis
supplements_beta <- list()2.3.2.1 Genus level
level="genus"2.3.2.1.1 Bray-Curtis
PERMANOVA
pairwise_df <- filt_ileum_genus_tab %>% column_to_rownames("SeqID") %>% t()
# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, sim.method = "bray", p.adjust.m="BH")
# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, interaction = TRUE, sim.method = "bray", p.adjust.m="BH")
# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]
cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols;
# save raw results
supplements_beta[[paste("bray",level,segment)]] <- rbind(pp_factor,pp_cov,pp_fac.cov)# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 0.198 | 0.893 | 0.004 | 0.581 | 0.581 |
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd , Country | 1 | 1.523 | 6.858 | 0.032 | 0.001 | 0.001 | *** |
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd : Country | 1 | 0.262 | 1.179 | 0.006 | 0.264 | 0.264 |
Interaction check
interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]
if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
group2 <- unlist(strsplit(group2,split = " : "))[1]
result_list <- adonis_postanalysis(x=pairwise_df,
factors = filt_ileum_metadata$Group,
covariate = filt_ileum_metadata$Country,
group1 = group1,
group2 = group2,
sim.method = 'bray')
print(result_list)
}
}Plots
p <- pca_plot_custom(filt_ileum_genus_tab,
filt_ileum_genus_taxa,
filt_ileum_metadata,
measure = "bray",
show_boxplots = TRUE,
variable = "Group", size=3, show_legend=TRUE)
# save the results
supplements_beta[[paste("PCoA bray",level,segment)]] <- p
# see the results
p2.3.2.1.2 Jaccard
PERMANOVA
pairwise_df <- filt_ileum_genus_tab %>% column_to_rownames("SeqID") %>% t()
# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, sim.method = "jaccard", p.adjust.m="BH")
# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, interaction = TRUE, sim.method = "jaccard", p.adjust.m="BH")
# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]
cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols;
# save raw results
supplements_beta[[paste("jaccard",level,segment)]] <- rbind(pp_factor, pp_cov, pp_fac.cov)# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 0.267 | 0.87 | 0.004 | 0.704 | 0.704 |
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd , Country | 1 | 1.569 | 5.123 | 0.024 | 0.001 | 0.001 | *** |
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd : Country | 1 | 0.344 | 1.123 | 0.005 | 0.26 | 0.26 |
Interaction check
interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]
if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
group2 <- unlist(strsplit(group2,split = " : "))[1]
result_list <- adonis_postanalysis(x=pairwise_df,
factors = filt_ileum_metadata$Group,
covariate = filt_ileum_metadata$Country,
group1 = group1,
group2 = group2,
sim.method = 'jaccard')
print(result_list)
}
}Plots
p <- pca_plot_custom(filt_ileum_genus_tab,
filt_ileum_genus_taxa,
filt_ileum_metadata,
measure = "jaccard",
show_boxplots = TRUE,
variable = "Group", size=3, show_legend=TRUE)
# save the results
supplements_beta[[paste("PCoA jaccard",level,segment)]] <- p
# see the results
p2.3.2.2 ASV level
level="ASV"2.3.2.2.1 Aitchison
PERMANOVA
# preparing data frame
pairwise_df <- filt_ileum_asv_tab %>% column_to_rownames("SeqID") %>% t()
# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, sim.method = "robust.aitchison", p.adjust.m="BH")
# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, interaction = TRUE, sim.method = "robust.aitchison", p.adjust.m="BH")
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]
cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols;
# save raw results
supplements_beta[[paste("aitchison",level,segment)]] <- rbind(pp_factor,
pp_cov,
pp_fac.cov)# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 252.554 | 1.038 | 0.005 | 0.327 | 0.327 |
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd , Country | 1 | 600.047 | 2.467 | 0.012 | 0.001 | 0.001 | *** |
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd : Country | 1 | 331.978 | 1.367 | 0.007 | 0.019 | 0.019 | * |
Interaction check
interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]
if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
group2 <- unlist(strsplit(group2,split = " : "))[1]
result_list <- adonis_postanalysis(x=pairwise_df,
factors = filt_ileum_metadata$Group,
covariate = filt_ileum_metadata$Country,
group1 = group1,
group2 = group2)
print(result_list)
}
}$no_ibd_ibd_CZ
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Permutation: free
Number of permutations: 999
adonis2(formula = x1 ~ Fac, data = x2, permutations = perm)
Df SumOfSqs R2 F Pr(>F) p.adjusted
Fac 1 322.6 0.01055 1.3221 0.031 0.041333
Residual 124 30261.4 0.98945
Total 125 30584.1 1.00000
$no_ibd_ibd_NO
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Permutation: free
Number of permutations: 999
adonis2(formula = x1 ~ Fac, data = x2, permutations = perm)
Df SumOfSqs R2 F Pr(>F) p.adjusted
Fac 1 261.9 0.0134 1.0867 0.225 0.225
Residual 80 19278.7 0.9866
Total 81 19540.6 1.0000
$no_ibd_CZ_vs_NO
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Permutation: free
Number of permutations: 999
adonis2(formula = x1 ~ Cov, data = x2, permutations = perm)
Df SumOfSqs R2 F Pr(>F) p.adjusted
Cov 1 381.9 0.0475 1.5458 0.005 0.01
Residual 31 7658.1 0.9525
Total 32 8040.0 1.0000
$ibd_CZ_vs_NO
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Permutation: free
Number of permutations: 999
adonis2(formula = x1 ~ Cov, data = x2, permutations = perm)
Df SumOfSqs R2 F Pr(>F) p.adjusted
Cov 1 550 0.01297 2.2725 0.001 0.004
Residual 173 41882 0.98703
Total 174 42432 1.00000
PCoA
p <- pca_plot_custom(filt_ileum_asv_tab,
filt_ileum_taxa_tab,
filt_ileum_metadata,
show_boxplots = TRUE,
variable = "Group", size=3, show_legend=TRUE)
# save the results
supplements_beta[[paste("PCoA aitchison",level,segment)]] <- p
# see the results
p2.3.2.2.2 Bray-Curtis
PERMANOVA
# preparing data frame
pairwise_df <- filt_ileum_asv_tab %>% column_to_rownames("SeqID") %>% t()
# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, sim.method = "bray", p.adjust.m="BH")
# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, interaction = TRUE, sim.method = "bray", p.adjust.m="BH")
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]
cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols;
# save raw results
supplements_beta[[paste("bray",level,segment)]] <- rbind(pp_factor,
pp_cov,
pp_fac.cov)# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 0.321 | 0.952 | 0.005 | 0.573 | 0.573 |
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd , Country | 1 | 1.545 | 4.585 | 0.022 | 0.001 | 0.001 | *** |
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd : Country | 1 | 0.489 | 1.455 | 0.007 | 0.053 | 0.053 |
Interaction check
interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]
if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
group2 <- unlist(strsplit(group2,split = " : "))[1]
result_list <- adonis_postanalysis(x=pairwise_df,
factors = filt_ileum_metadata$Group,
covariate = filt_ileum_metadata$Country,
group1 = group1,
group2 = group2,
sim.method = 'bray')
print(result_list)
}
}PCoA
p <- pca_plot_custom(filt_ileum_asv_tab,
filt_ileum_taxa_tab,
filt_ileum_metadata,
measure = "bray",
show_boxplots = TRUE,
variable = "Group", size=3, show_legend=TRUE)
# save the results
supplements_beta[[paste("PCoA bray",level,segment)]] <- p
# see the results
p2.3.2.2.3 Jaccard
PERMANOVA
# preparing data frame
pairwise_df <- filt_ileum_asv_tab %>% column_to_rownames("SeqID") %>% t()
# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, sim.method = "jaccard", p.adjust.m="BH")
# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, interaction = TRUE, sim.method = "jaccard", p.adjust.m="BH")
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]
cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols;
# save raw results
supplements_beta[[paste("jaccard",level,segment)]] <- rbind(pp_factor,
pp_cov,
pp_fac.cov)# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 0.381 | 0.95 | 0.005 | 0.637 | 0.637 |
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd , Country | 1 | 1.278 | 3.185 | 0.015 | 0.001 | 0.001 | *** |
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd : Country | 1 | 0.514 | 1.284 | 0.006 | 0.051 | 0.051 |
Interaction check
interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]
if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
group2 <- unlist(strsplit(group2,split = " : "))[1]
result_list <- adonis_postanalysis(x=pairwise_df,
factors = filt_ileum_metadata$Group,
covariate = filt_ileum_metadata$Country,
group1 = group1,
group2 = group2,
sim.method = 'jaccard')
print(result_list)
}
}PCoA
p <- pca_plot_custom(filt_ileum_asv_tab,
filt_ileum_taxa_tab,
filt_ileum_metadata,
measure = "jaccard",
show_boxplots = TRUE,
variable = "Group", size=3, show_legend=TRUE)
# save the results
supplements_beta[[paste("PCoA jaccard",level,segment)]] <- p
# see the results
p2.3.2.3 Saving results
write.xlsx(supplements_beta[!grepl("PCoA",names(supplements_beta))],
file = file.path(path,
paste0("supplements_beta_diversity_", segment,".xlsx")))2.4 Univariate Analysis
2.4.1 Main analysis
level="genus"# needed paths
path = "../results/Q3/univariate_analysis"
path_maaslin=file.path("../intermediate_files/maaslin/Q3",level)# variables
raw_linda_results_genus <- list();
raw_linda_results_genus[[segment]] <- list()
linda_results_genus <- list();
linda_results_genus[[segment]] <- list()
# country and interaction problems
list_intersections <- list()
list_venns <- list()
uni_statistics <- list()
# workbook for final df
wb <- createWorkbook()
# PSC effect
psc_effect <- list()Aggregate taxa
genus_data <- aggregate_taxa(ileum_asv_tab,
ileum_taxa_tab,
taxonomic_level = level)
ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
ileum_genus_asv_taxa_tab <- create_asv_taxa_table(ileum_genus_tab,
ileum_genus_taxa_tab)2.4.1.1 ibd vs no_ibd
group <- c("no_ibd","ibd")
comparison_name <- paste0(group[1], " vs ",group[2])2.4.1.1.1 linDA
# prepare the data
linda_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group, usage="linDA")Removing 6 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]
# fit the model
linda.obj <- linda(filt_ileum_uni_data,
filt_ileum_uni_metadata,
formula = '~ Group * Country')0 features are filtered!
The filtered data has 208 samples and 184 features will be tested!
Imputation approach is used.
Fit linear models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)
# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO")
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")
for (grp in c(group1,group2,group3)){
raw_linda_results_genus[[segment]][[grp]] <-
rawlinda.df(linda.output,
grp,
filt_ileum_uni_data,
filt_ileum_uni_taxa)
linda_results_genus[[segment]][[grp]] <-
linda.df(linda.output,
grp,
filt_ileum_uni_data,
filt_ileum_uni_taxa)
}# volcano plot
volcano_1 <- volcano_plot_linda(linda.output,
group1,
taxa_table = filt_ileum_uni_taxa) +
ggtitle(comparison_title)
volcano_2 <- volcano_plot_linda(linda.output, group2,
taxa_table = filt_ileum_uni_taxa) +
ggtitle("NO vs CZ")
volcano_3 <- volcano_plot_linda(linda.output, group3,
taxa_table = filt_ileum_uni_taxa) +
ggtitle("Interaction effect")
volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)
volcano2.4.1.1.2 MaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) +
ggtitle(comparison_title)
volcano2 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa,variable="Country") +
ggtitle("Country effect")
volcano <- ggarrange(volcano1,volcano2, ncol=2)
volcano2.4.1.1.3 Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
linda.output, fit_data,
raw_linda_results_genus,
segment = segment,
level=level)
list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]
# show the results
venn2.4.1.1.4 Interaction effect
list_interaction_significant <- country_interaction(group,
linda.output,
list_intersections,
filt_ileum_uni_data,
filt_ileum_uni_metadata,
segment=segment,
level=level)
# see the result
## significant interaction effect
list_interaction_significant[[1]]NULL
## results for czech cohort
list_interaction_significant[[2]][1] NA
## results for norwegian cohort
list_interaction_significant[[3]][1] NA
Removing problematic taxa
list_intersections <- removing_interaction_problems(group,
list_interaction_significant,
list_intersections,
segment=segment,
level=level)2.4.1.1.5 Basic statistics
uni_df <- merge(basic_univariate_statistics(linda_data,group),
raw_linda_results_genus[[segment]][[group1]],
by="SeqID",all=TRUE)[1] "no_ibd"
[1] "ibd"
[1] "no_ibd"
[1] "ibd"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df
# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)2.4.1.2 Visualization
Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.
list_heatmap <- list_intersections[grep(paste(segment,level),
names(list_intersections),value=TRUE)]
if (length((list_heatmap[[1]][[1]]))>1){
p_heatmap_linda <- heatmap_linda(list_heatmap,ileum_taxa_tab)
p_heatmap_linda
}Dotheatmap
if (length((list_heatmap[[1]][[1]]))>0){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
uni_statistics$terminal_ileum[grepl(level,names(uni_statistics$terminal_ileum))],
ileum_taxa_tab)
dotheatmap_linda
}Horizontal bar plot
if (length((list_heatmap[[1]][[1]]))>0){
p_prevalence <- horizontal_barplot(wb,taxa=levels(dotheatmap_linda$data$SeqID))
p_prevalence_final <- ggarrange(p_prevalence,
ggplot() + theme_minimal(),
nrow = 2,heights = c(1,0.085))
p <- ggarrange(dotheatmap_linda + theme(legend.position = "none"),p_prevalence_final,ncol=2,widths = c(1,0.2))
p
}2.4.1.3 Saving results
# ALL DATA
saveWorkbook(wb,file.path(path,paste0("uni_analysis_wb_",segment,".xlsx")),
overwrite = TRUE)
# PSC effect
write.xlsx(psc_effect[[paste(segment,level)]],file.path(path,paste0("psc_effect_",segment,".xlsx")))
# SIGNIFICANT taxa
write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
`names<-`(gsub(segment, "", names(
list_intersections[grepl(segment,names(list_intersections))]))),
file.path(path,paste0("significant_taxa_",segment,".xlsx")))2.4.2 Supplementary Analysis
supplements_uni <- list()
supplements_wb <- createWorkbook()2.4.2.1 ASV level
level="ASV"path_maaslin="../intermediate_files/maaslin/Q3/ASV/"raw_linda_results <- list();
raw_linda_results[[segment]] <- list()
linda_results <- list();
linda_results[[segment]] <- list()2.4.2.1.1 ibd vs no_ibd
group <- c("no_ibd","ibd")
comparison_name <- paste0(group[1], " vs ",group[2])linDA
# prepare the data
linda_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="linDA")Removing 68 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]
# fit the model
linda.obj <- linda(filt_ileum_uni_data,
filt_ileum_uni_metadata,
formula = '~ Group * Country')0 features are filtered!
The filtered data has 208 samples and 449 features will be tested!
Warning in linda(filt_ileum_uni_data, filt_ileum_uni_metadata, formula = "~ Group * Country"): Some features have less than 3 nonzero values!
They have virtually no statistical power. You may consider filtering them in the analysis!
Imputation approach is used.
Fit linear models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)
# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO")
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")
for (grp in c(group1,group2,group3)){
raw_linda_results[[segment]][[grp]] <-
rawlinda.df(linda.output,
grp,
filt_ileum_uni_data,
filt_ileum_uni_taxa)
linda_results[[segment]][[grp]] <-
linda.df(linda.output,
grp,
filt_ileum_uni_data,
filt_ileum_uni_taxa)
}# volcano plot
volcano_1 <- volcano_plot_linda(linda.output,
group1,
taxa_table = filt_ileum_uni_taxa) +
ggtitle(comparison_title)
volcano_2 <- volcano_plot_linda(linda.output, group2,
taxa_table = filt_ileum_uni_taxa) +
ggtitle("NO vs CZ")
volcano_3 <- volcano_plot_linda(linda.output, group3,
taxa_table = filt_ileum_uni_taxa) +
ggtitle("Interaction effect")
volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)
volcanoMaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) +
ggtitle(comparison_title)
volcano2 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa,variable="Country") +
ggtitle("Country effect")
volcano <- ggarrange(volcano1,volcano2, ncol=2)
volcanoGroup - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
linda.output, fit_data,
raw_linda_results,
segment = segment,
level=level)
list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]
# show the results
vennInteraction effect
list_interaction_significant <- country_interaction(group,
linda.output,
list_intersections,
filt_ileum_uni_data,
filt_ileum_uni_metadata,
segment=segment,
level=level)
# see the result
## significant interaction effect
list_interaction_significant[[1]]NULL
## results for czech cohort
list_interaction_significant[[2]][1] NA
## results for norwegian cohort
list_interaction_significant[[3]][1] NA
Removing problematic taxa
list_intersections <- removing_interaction_problems(group,
list_interaction_significant,
list_intersections,
segment=segment,
level=level)Basic statistics
uni_df <- merge(basic_univariate_statistics(linda_data,group),
raw_linda_results[[segment]][[group1]],
by="SeqID",all=TRUE)[1] "no_ibd"
[1] "ibd"
[1] "no_ibd"
[1] "ibd"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df
# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)2.4.2.1.2 Visualization
Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.
list_heatmap <- list_intersections[grep(paste(segment,level),
names(list_intersections),value=TRUE)]
if (length((list_heatmap[[1]][[1]]))>1){
p_heatmap_linda <- heatmap_linda(list_heatmap,ileum_taxa_tab)
p_heatmap_linda
}Dot heatmap
if (length((list_heatmap[[1]][[1]]))>0){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
uni_statistics$terminal_ileum[grepl(level,names(uni_statistics$terminal_ileum))],
ileum_taxa_tab)
dotheatmap_linda
}Horizontal bar plot
if (length((list_heatmap[[1]][[1]]))>0){
p_prevalence <- horizontal_barplot(wb,taxa=levels(dotheatmap_linda$data$SeqID))
p_prevalence_final <- ggarrange(p_prevalence,
ggplot() + theme_minimal(),
nrow = 2,heights = c(1,0.085))
p <- ggarrange(dotheatmap_linda + theme(legend.position = "none"),p_prevalence_final,ncol=2,widths = c(1,0.2))
p
}2.4.2.2 Phylum level
level="phylum"path_maaslin="../intermediate_files/maaslin/Q3/Phylum/"raw_linda_results_phylum <- list();
raw_linda_results_phylum[[segment]] <- list()
linda_results_phylum <- list();
linda_results_phylum[[segment]] <- list()Aggregate taxa
phylum_data <- aggregate_taxa(ileum_asv_tab,
ileum_taxa_tab,
taxonomic_level = "Phylum")
ileum_phylum_tab <- phylum_data[[1]]
ileum_phylum_taxa_tab <- phylum_data[[2]]2.4.2.2.1 ibd vs no_ibd
group <- c("no_ibd","ibd")
comparison_name <- paste0(group[1], " vs ",group[2])linDA
# prepare the data
linda_data <- binomial_prep(ileum_phylum_tab,
ileum_phylum_taxa_tab,
ileum_metadata,
group, usage="linDA")
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]
# fit the model
linda.obj <- linda(filt_ileum_uni_data,
filt_ileum_uni_metadata,
formula = '~ Group * Country')0 features are filtered!
The filtered data has 208 samples and 10 features will be tested!
Imputation approach is used.
Fit linear models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)
# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO")
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")
for (grp in c(group1,group2,group3)){
raw_linda_results_phylum[[segment]][[grp]] <-
rawlinda.df(linda.output,
grp,
filt_ileum_uni_data,
filt_ileum_uni_taxa)
linda_results_phylum[[segment]][[grp]] <-
linda.df(linda.output,
grp,
filt_ileum_uni_data,
filt_ileum_uni_taxa)
}# volcano plot
volcano_1 <- volcano_plot_linda(linda.output,
group1,
taxa_table = filt_ileum_uni_taxa) +
ggtitle(comparison_title)Using Phylum for naming
volcano_2 <- volcano_plot_linda(linda.output, group2,
taxa_table = filt_ileum_uni_taxa) +
ggtitle("NO vs CZ")Using Phylum for naming
volcano_3 <- volcano_plot_linda(linda.output, group3,
taxa_table = filt_ileum_uni_taxa) +
ggtitle("Interaction effect")Using Phylum for naming
volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)
volcanoMaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) +
ggtitle(comparison_title)
volcano2 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa,variable="Country") +
ggtitle("Country effect")
volcano <- ggarrange(volcano1,volcano2, ncol=2)
volcanoGroup - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
linda.output, fit_data,
raw_linda_results_phylum,
segment = segment,
level=level)
list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]
# show the results
vennInteraction effect
list_interaction_significant <- country_interaction(group,
linda.output,
list_intersections,
filt_ileum_uni_data,
filt_ileum_uni_metadata,
segment=segment,
level=level)
# see the result
## significant interaction effect
list_interaction_significant[[1]]NULL
## results for czech cohort
list_interaction_significant[[2]][1] NA
## results for norwegian cohort
list_interaction_significant[[3]][1] NA
Removing problematic taxa
list_intersections <- removing_interaction_problems(group,
list_interaction_significant,
list_intersections,
segment=segment,
level=level)Basic statistics
uni_df <- merge(basic_univariate_statistics(linda_data,group),
raw_linda_results_phylum[[segment]][[group1]],
by="SeqID",all=TRUE)[1] "no_ibd"
[1] "ibd"
[1] "no_ibd"
[1] "ibd"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df
# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)2.4.2.2.2 Visualization
Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.
list_heatmap <- list_intersections[grep(paste(segment,level),
names(list_intersections),value=TRUE)]
if (length((list_heatmap[[1]][[1]]))>1){
p_heatmap_linda <- heatmap_linda(list_heatmap,ileum_taxa_tab)
p_heatmap_linda
}Dot heatmap
if (length((list_heatmap[[1]][[1]]))>1){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
uni_statistics$terminal_ileum[grepl(level,names(uni_statistics$terminal_ileum))],
ileum_taxa_tab)
dotheatmap_linda
}2.4.2.3 Saving results
# ALL DATA
saveWorkbook(supplements_wb,file.path(path,paste0("supplements_uni_analysis_wb_",segment,".xlsx")),overwrite = TRUE)
# SIGNIFICANT taxa
write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
`names<-`(gsub(segment, "", names(
list_intersections[grepl(segment,names(list_intersections))]))),
file.path(path,paste0("supplements_significant_taxa_",segment,".xlsx")))2.5 Results overview
2.5.0.1 Alpha diversity
knitr::kable(pc_observed[[segment]],
digits = 3,
caption = "Results of linear model testing ASV Richness")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| no_ibd vs Groupibd | -5.135 | 13.890 | -0.370 | 0.712 |
| no_ibd , ibd - CZ vs NO | 0.000 | 17.652 | 0.000 | 1.000 |
| no_ibd vs Groupibd:CountryNO | -11.094 | 19.350 | -0.573 | 0.567 |
knitr::kable(pc_shannon[[segment]],
digits = 3,
caption = "Results of linear model testing Shannon index")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| no_ibd vs Groupibd | 0.034 | 0.182 | 0.187 | 0.852 |
| no_ibd , ibd - CZ vs NO | -0.070 | 0.231 | -0.302 | 0.763 |
| no_ibd vs Groupibd:CountryNO | -0.171 | 0.254 | -0.676 | 0.500 |
knitr::kable(pc_simpson[[segment]],
digits = 3,
caption = "Results of linear model testing Simpson index")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| no_ibd vs Groupibd | 0.010 | 0.026 | 0.401 | 0.689 |
| no_ibd , ibd - CZ vs NO | 0.004 | 0.033 | 0.125 | 0.900 |
| no_ibd vs Groupibd:CountryNO | -0.031 | 0.036 | -0.853 | 0.395 |
knitr::kable(pc_pielou[[segment]],
digits = 3,
caption = "Results of linear model testing Pielou index")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| no_ibd vs Groupibd | 0.016 | 0.027 | 0.593 | 0.554 |
| no_ibd , ibd - CZ vs NO | -0.014 | 0.034 | -0.419 | 0.676 |
| no_ibd vs Groupibd:CountryNO | -0.025 | 0.037 | -0.674 | 0.501 |
Plots
alpha_div_plots[[paste(segment,"Country")]]2.5.0.2 Beta diversity
Main results
knitr::kable(pairwise_aitchison_raw[[paste("genus", segment)]],
digits = 3,
caption = "Results of PERMANOVA - robust aitchison distance")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 165.293 | 0.975 | 0.005 | 0.496 | 0.496 | |
| no_ibd vs ibd , Country | 1 | 609.984 | 3.599 | 0.017 | 0.001 | 0.001 | *** |
| no_ibd vs ibd : Country | 1 | 185.468 | 1.095 | 0.005 | 0.259 | 0.259 |
PCA
pca_plots_list[[paste(segment,"genus custom")]]Supplements
knitr::kable(supplements_beta[!grepl("PCoA",names(supplements_beta)) & (grepl("genus",names(supplements_beta)))],
digits = 3,
caption = "Supplementary PERMANOVA results: Bray-curtis, Jaccard distances")
|
|
PCA
ggarrange(plotlist = supplements_beta[grepl("PCoA",names(supplements_beta))],
labels=names(supplements_beta[grepl("PCoA",names(supplements_beta))]),
font.label = list(size=5,face="plain"),
ncol=2,nrow=3)2.5.0.3 Univariate analysis
Number of significant taxa
knitr::kable(as.data.frame(lapply(list_intersections,nrow)) %>% t() %>%
`colnames<-`("Count") %>%
`rownames<-`(c(names(list_intersections))),caption="Number of significant taxa")| Count | |
|---|---|
| terminal_ileum genus no_ibd vs ibd | 0 |
| terminal_ileum ASV no_ibd vs ibd | 0 |
| terminal_ileum phylum no_ibd vs ibd | 0 |
3 Data Analysis - Colon
segment="colon"3.1 Filtering
Rules: - prevalence > 5% (per group) - nearZeroVar with default settings - sequencing depth > 5000 - taxonomic assignment at least order
Library size
read_counts(colon_asv_tab, line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
3.1.1 Sequencing depth
data_filt <- seq_depth_filtering(colon_asv_tab,
colon_taxa_tab,
colon_metadata,
seq_depth_threshold = 10000)Removing 52 ASV(s)
filt_colon_asv_tab <- data_filt[[1]]; alpha_colon_asv_tab <- filt_colon_asv_tab
filt_colon_taxa_tab <- data_filt[[2]]; alpha_colon_taxa_tab <- filt_colon_taxa_tab
filt_colon_metadata <- data_filt[[3]]; alpha_colon_metadata <- filt_colon_metadata
seq_step <- dim(filt_colon_asv_tab)[1]Library size
read_counts(filt_colon_asv_tab,line = c(10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
3.1.2 NearZeroVar
data_filt <- nearzerovar_filtering(filt_colon_asv_tab,
filt_colon_taxa_tab,
filt_colon_metadata)
filt_colon_asv_tab <- data_filt[[1]]
filt_colon_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_colon_asv_tab)[1]Library size
read_counts(filt_colon_asv_tab,line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Check zero depth
data_filt <- check_zero_depth(filt_colon_asv_tab,
filt_colon_taxa_tab,
filt_colon_metadata)
filt_colon_asv_tab <- data_filt[[1]];
filt_colon_taxa_tab <- data_filt[[2]];
filt_colon_metadata <- data_filt[[3]]; Library size
read_counts(filt_colon_asv_tab,line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
3.1.3 Final Counts
final_counts_filtering(colon_asv_tab,
filt_colon_asv_tab,
filt_colon_metadata,
seq_step, 0, nearzero_step) V1
Raw data: ASVs 5779
Raw data: Samples 616
Sequencing depth filt: ASVs 5727
Prevalence filt: ASVs 0
NearZeroVar filt: ASVs 306
Filt data: ASVs 306
Filt data: Samples 599
Filt data: Patients 263
Filt data: Patients.1 0
Filtered samples 17
Matrices CA;CD;SI;Cecum;Rectum
healthy 0
non-rPSC 0
rPSC 0
pre_ltx 0
post_ltx 0
ETOH 0
3.2 Alpha diversity
path = "../results/Q3/alpha_diversity"Calculation
# Construct MPSE object
alpha_colon_metadata$Sample <- alpha_colon_metadata$SampleID
colon_mpse <- as.MPSE(construct_phyloseq(alpha_colon_asv_tab,
alpha_colon_taxa_tab,
alpha_colon_metadata))
colon_mpse %<>% mp_rrarefy(raresize = 10000,seed = 123)
# Calculate alpha diversity - rarefied counts
colon_mpse %<>% mp_cal_alpha(.abundance=RareAbundance, force=TRUE)alpha_data <- data.frame(SampleID=colon_mpse$Sample.x,
Observe=colon_mpse$Observe,
Shannon=colon_mpse$Shannon,
Simpson=colon_mpse$Simpson,
Pielou=colon_mpse$Pielou,
Group=colon_mpse$Group,
Country=colon_mpse$Country,
Patient=colon_mpse$Patient)
write.csv(alpha_data,file.path(path,paste0("alpha_indices_",segment,".csv")),
row.names = FALSE)3.2.1 Plots
p_boxplot_alpha <- alpha_diversity_countries(alpha_data)Using SampleID, Group, Country, Patient as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Using SampleID, Group, Country, Patient as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
# save the results
alpha_div_plots[[paste(segment,"Country")]] <- p_boxplot_alpha
# see the results
p_boxplot_alpha3.2.2 Linear Model
path = "../results/Q3/alpha_diversity"
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))Richness
results_model <- pairwise.lmer(
formula = "Observe ~ Group * Country + (1|Patient)",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_observe <- results_model[[1]]
results_model_observe_detailed <- results_model[[2]]
} else {
results_model_observe <- results_model
results_model_observe_detailed <- NA
}
# save the results
pc_observed[[segment]] <- results_model_observe# see the results
knitr::kable(results_model_observe,digits = 3,
caption = "Raw results of linear model of richness estimation.")| Estimate | Std..Error | df | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|---|
| no_ibd vs Groupibd | -2.810 | 11.952 | 439.998 | -0.235 | 0.814 | 0.814 | |
| no_ibd vs ibd - CZ vs NO | -10.684 | 15.292 | 332.353 | -0.699 | 0.485 | 0.485 | |
| no_ibd vs Groupibd:CountryNO | -5.680 | 16.562 | 341.720 | -0.343 | 0.732 | 0.732 |
knitr::kable(results_model_observe_detailed,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
Shannon
results_model <- pairwise.lmer(
formula = "Shannon ~ Group * Country + (1|Patient)",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_shannon <- results_model[[1]]
results_model_shannon_detailed <- results_model[[2]]
} else {
results_model_shannon <- results_model
results_model_shannon_detailed <- NA
}
# save the results
pc_shannon[[segment]] <- as.data.frame(results_model_shannon)# see the results
knitr::kable(results_model_shannon,digits = 3,
caption = "Raw results of linear model of Shannon estimation.")| Estimate | Std..Error | df | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|---|
| no_ibd vs Groupibd | 0.033 | 0.158 | 456.049 | 0.211 | 0.833 | 0.833 | |
| no_ibd vs ibd - CZ vs NO | -0.138 | 0.203 | 336.238 | -0.678 | 0.498 | 0.498 | |
| no_ibd vs Groupibd:CountryNO | -0.119 | 0.220 | 346.763 | -0.540 | 0.589 | 0.589 |
knitr::kable(results_model_shannon_detailed,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
Simpson
results_model <- pairwise.lmer(
formula = "Simpson ~ Group * Country + (1|Patient)",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_simpson <- results_model[[1]]
results_model_simpson_detailed <- results_model[[2]]
} else {
results_model_simpson <- results_model
results_model_simpson_detailed <- NA
}
# save the results
pc_simpson[[segment]] <- as.data.frame(results_model_simpson)# see the results
knitr::kable(results_model_simpson,digits = 3,
caption = "Raw results of linear model of Simpson estimation.")| Estimate | Std..Error | df | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|---|
| no_ibd vs Groupibd | -0.007 | 0.026 | 453.407 | -0.261 | 0.794 | 0.794 | |
| no_ibd vs ibd - CZ vs NO | -0.025 | 0.033 | 334.532 | -0.744 | 0.457 | 0.457 | |
| no_ibd vs Groupibd:CountryNO | 0.005 | 0.036 | 344.950 | 0.131 | 0.896 | 0.896 |
knitr::kable(results_model_simpson_detailed,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
3.2.3 Saving results
alpha_list <- list(
Richness=pc_observed[[segment]] %>% rownames_to_column("Comparison"),
Shannon=pc_shannon[[segment]] %>% rownames_to_column("Comparison"),
Simpson=pc_simpson[[segment]] %>% rownames_to_column("Comparison"))
write.xlsx(alpha_list,
file = file.path(path,paste0("alpha_diversity_results_",segment,".xlsx")))3.3 Beta diversity
Calculating Aitchison distance (euclidean distance on clr-transformed data), both at ASV and genus level.
3.3.1 Main analysis - Genus, Aitchison
Genus level, Aitchison distance
level="genus"path = "../results/Q3/beta_diversity"Aggregation, filtering
genus_data <- aggregate_taxa(colon_asv_tab,
colon_taxa_tab,
taxonomic_level=level,
names=TRUE)
filt_data <- filtering_steps(genus_data[[1]],
genus_data[[2]],
colon_metadata,
seq_depth_threshold=10000)Removing 10 ASV(s)
filt_colon_genus_tab <- filt_data[[1]]
filt_colon_genus_taxa <- filt_data[[2]]
filt_colon_genus_metadata <- filt_data[[3]]3.3.1.0.1 PERMANOVA
pairwise_df <- filt_colon_genus_tab %>% column_to_rownames("SeqID") %>% t()
# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_colon_genus_metadata$Group,
covariate = filt_colon_genus_metadata$Country,
patients = filt_colon_genus_metadata$Patient,
sim.method = "robust.aitchison", p.adjust.m="BH")
# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_colon_genus_metadata$Group,
covariate = filt_colon_genus_metadata$Country,
interaction = TRUE,
patients = filt_colon_genus_metadata$Patient,
sim.method = "robust.aitchison", p.adjust.m="BH")
# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]
cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols;
# save raw results
pairwise_aitchison_raw[[paste(level, segment)]] <-rbind(pp_factor,pp_cov,pp_fac.cov)# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 172.58 | 1.118 | 0.002 | 1 | 1 |
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd , Country | 1 | 1345.829 | 8.716 | 0.014 | 0.001 | 0.001 | *** |
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd : Country | 1 | 171.139 | 1.109 | 0.002 | 1 | 1 |
Interaction check
interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.1]
if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
group2 <- unlist(strsplit(group2,split = " : "))[1]
result_list <- adonis_postanalysis(x=pairwise_df,
factors = filt_colon_genus_metadata$Group,
covariate = filt_colon_genus_metadata$Country,
group1 = group1,
group2 = group2,
patients = filt_colon_genus_metadata$Patient)
print(result_list)
}
}3.3.1.0.2 Plots
PCoA custom
p <- pca_plot_custom(filt_colon_genus_tab,
filt_colon_genus_taxa,
filt_colon_genus_metadata,
show_boxplots = TRUE,
variable = "Group", size=2,
show_legend=FALSE)
# save the results
pca_plots_list[[paste(segment,level,"custom")]] <- p
# see the results
p3.3.1.1 PCA + correlations
variables= c("Calprotectin","Nancy_max","eMAYO","MAYO_dai")metadata_clinical <- metadata_final
metadata_clinical$PatientID <- as.character(metadata_clinical$PatientID)
metadata_colon_pca <- merge(filt_colon_metadata %>% dplyr::mutate(PatientID=Patient),metadata_clinical[,c("SampleID",variables)],by="SampleID",all.x=TRUE) %>% dplyr::select(SampleID, Group, Country,variables,PatientID)
res <- pca_plot_corr(filt_colon_genus_tab,
filt_colon_genus_taxa,
filt_colon_metadata,
show_boxplots = FALSE,
variable = "Group", size=3,
segment=segment,
show_legend= FALSE,
clinical = TRUE,
clinical_metadata = metadata_colon_pca,
variables= c("Calprotectin","Nancy_max","eMAYO","MAYO_dai"))
res[[1]]
variable r p p.adj
Calprotectin Calprotectin 0.15183991 0.02211692 0.08846768
Nancy_max Nancy_max 0.02893904 0.76623279 0.76623279
eMAYO eMAYO 0.11425372 0.23903459 0.31871279
MAYO_dai MAYO_dai 0.15876800 0.10075720 0.20151441
[[2]]
variable r p p.adj
Calprotectin Calprotectin -0.036206277 0.5873577 0.9814957
Nancy_max Nancy_max -0.121916388 0.2087739 0.8350957
eMAYO eMAYO 0.007247831 0.9406554 0.9814957
MAYO_dai MAYO_dai 0.002258098 0.9814957 0.9814957
[[3]]
r p p_adjusted
Rothia 0.390506302 7.317003e-11 2.299629e-10
Barnesiella -0.173311204 4.871936e-03 6.304859e-03
Butyricimonas 0.086206373 1.632279e-01 1.795507e-01
Odoribacter -0.156940417 1.087274e-02 1.258949e-02
Alistipes -0.540791408 0.000000e+00 0.000000e+00
Parabacteroides -0.160141748 9.343463e-03 1.141979e-02
Enterococcus 0.557840325 0.000000e+00 0.000000e+00
Coprococcus -0.297947401 9.700752e-07 1.778471e-06
Fusicatenibacter -0.238752134 9.651101e-05 1.415495e-04
Hungatella 0.498204405 0.000000e+00 0.000000e+00
Lachnoclostridium 0.316787297 1.789829e-07 4.375138e-07
Lachnospiraceae_FCS020_group -0.313187531 2.493911e-07 5.486604e-07
Colidextribacter -0.319037397 1.451537e-07 3.991725e-07
Intestinimonas -0.246489288 5.610989e-05 8.817268e-05
Oscillibacter -0.211212640 5.797319e-04 7.971314e-04
Negativibacillus 0.003083252 9.602874e-01 9.602874e-01
Family_XIII_AD3011_group -0.273931153 7.118703e-06 1.204704e-05
Dialister -0.048107039 4.369693e-01 4.577774e-01
Veillonella 0.487572002 0.000000e+00 0.000000e+00
Klebsiella 0.525435178 0.000000e+00 0.000000e+00
Pseudomonas 0.452466206 0.000000e+00 0.000000e+00
Holdemania 0.309448577 3.504242e-07 7.008484e-07
[[4]]
r p p_adjusted
Rothia -0.11694852 5.825392e-02 1.281586e-01
Barnesiella 0.08788719 1.551534e-01 2.438125e-01
Butyricimonas 0.06207930 3.156765e-01 4.629923e-01
Odoribacter 0.13777399 2.552951e-02 6.240547e-02
Alistipes 0.20587665 8.008037e-04 2.936280e-03
Parabacteroides -0.05546489 3.700707e-01 5.088472e-01
Enterococcus 0.10571978 8.705689e-02 1.741138e-01
Coprococcus -0.31736054 1.697072e-07 1.244520e-06
Fusicatenibacter -0.17055712 5.603382e-03 1.540930e-02
Hungatella -0.04184159 4.990343e-01 6.099308e-01
Lachnoclostridium -0.36982398 8.085071e-10 1.134125e-08
Lachnospiraceae_FCS020_group 0.01337197 8.290071e-01 8.290071e-01
Colidextribacter 0.10125124 1.013142e-01 1.837458e-01
Intestinimonas 0.21838051 3.710357e-04 1.632557e-03
Oscillibacter 0.22796075 1.998482e-04 1.099165e-03
Negativibacillus 0.18107540 3.249534e-03 1.021282e-02
Family_XIII_AD3011_group 0.36763588 1.031023e-09 1.134125e-08
Dialister 0.01632857 7.919851e-01 8.290071e-01
Veillonella -0.09916474 1.085771e-01 1.837458e-01
Klebsiella 0.02833834 6.471234e-01 7.493008e-01
Pseudomonas 0.02540813 6.815006e-01 7.496507e-01
Holdemania 0.05081956 4.115365e-01 5.325767e-01
heatmap
df_heatmap <- melt(res[[3]] %>% rownames_to_column("SeqID") %>%
mutate(PCo1=r) %>% select(SeqID,PCo1))Using SeqID as id variables
labels_df <- melt(res[[3]] %>% rownames_to_column("SeqID") %>% select(SeqID,p_adjusted))Using SeqID as id variables
labels <- labels_df
labels$variable <- "PCo1"
labels$value <- NA
labels$value[labels_df$value<=0.001] <- "***"
labels$value[labels_df$value<=0.01] <- "**"
labels$value[labels_df$value<=0.05] <- "*"
p_mdi_colon <- ggplot() +
geom_tile(data=df_heatmap, aes(variable, SeqID, fill= value)) +
theme_minimal() +
scale_fill_gradient2(name = "Rho", low = "blue", mid = "white", high = "red", midpoint = 0,limits = c(-0.7, 0.7)) +
geom_text(data=labels,aes(x=variable,y=SeqID,label=value,vjust=0.6)) +
xlab("") + ylab("")
p_mdi_colonWarning: Removed 3 rows containing missing values or values outside the scale range
(`geom_text()`).
Calprotectin
metadata_colon_pca %<>% dplyr::mutate(log_Calprotectin=log(Calprotectin)) %>% `rownames<-`(NULL)
metadata_colon_pca <- metadata_colon_pca[match(filt_colon_metadata$SampleID, metadata_colon_pca$SampleID), ] %>% `rownames<-`(NULL)
p_2 <- pca_plot_custom(filt_colon_genus_tab,
filt_colon_genus_taxa,
metadata_colon_pca,
show_boxplots = FALSE,
variable = "log_Calprotectin", size=3,
show_legend= FALSE)
p_23.3.1.2 Saving results
write.xlsx(pairwise_aitchison_raw[[paste(level, segment)]],
file = file.path(path,
paste0("beta_diversity_results_", segment,".xlsx")))3.3.2 Supplementary analysis
3.3.2.1 Genus level
level="genus"3.3.2.1.1 Bray-Curtis
PERMANOVA
pairwise_df <- filt_colon_genus_tab %>% column_to_rownames("SeqID") %>% t()
# main effect
pp_main <- pairwise.adonis(pairwise_df,
filt_colon_genus_metadata$Group,
covariate = filt_colon_genus_metadata$Country,
patients = filt_colon_genus_metadata$Patient,
sim.method = "bray", p.adjust.m="BH")
# interaction
pp_int <- pairwise.adonis(pairwise_df,
filt_colon_genus_metadata$Group,
covariate = filt_colon_genus_metadata$Country,
patients = filt_colon_genus_metadata$Patient,
interaction = TRUE, sim.method = "bray", p.adjust.m="BH")
# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]
cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols;
# save raw results
supplements_beta[[paste("bray",level,segment)]] <- rbind(pp_factor,pp_cov,pp_fac.cov)# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 0.153 | 0.709 | 0.001 | 1 | 1 |
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd , Country | 1 | 5.847 | 27.123 | 0.043 | 0.001 | 0.001 | *** |
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd : Country | 1 | 0.167 | 0.774 | 0.001 | 0.991 | 0.991 |
Interaction check
interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]
if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
group2 <- unlist(strsplit(group2,split = " : "))[1]
result_list <- adonis_postanalysis(x=pairwise_df,
factors = filt_colon_genus_metadata$Group,
covariate = filt_colon_genus_metadata$Country,
patients = filt_colon_genus_metadata$Patient,
group1 = group1,
group2 = group2,
sim.method = 'bray')
print(result_list)
}
}Plots
p <- pca_plot_custom(filt_colon_genus_tab,
filt_colon_genus_taxa,
filt_colon_genus_metadata,
measure = "bray",
show_boxplots = TRUE,
variable = "Group", size=3, show_legend=TRUE)
# save the results
supplements_beta[[paste("PCoA bray",level,segment)]] <- p
# see the results
p3.3.2.1.2 Jaccard
PERMANOVA
pairwise_df <- filt_colon_genus_tab %>% column_to_rownames("SeqID") %>% t()
# main effect
pp_main <- pairwise.adonis(pairwise_df,
filt_colon_genus_metadata$Group,
covariate = filt_colon_genus_metadata$Country,
patients = filt_colon_genus_metadata$Patient,
sim.method = "jaccard", p.adjust.m="BH")
# interaction
pp_int <- pairwise.adonis(pairwise_df,
filt_colon_genus_metadata$Group,
covariate = filt_colon_genus_metadata$Country,
patients = filt_colon_genus_metadata$Patient,
interaction = TRUE, sim.method = "jaccard", p.adjust.m="BH")
# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]
cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols;
# save raw results
supplements_beta[[paste("jaccard",level,segment)]] <- rbind(pp_factor,
pp_cov,
pp_fac.cov)# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 0.249 | 0.827 | 0.001 | 1 | 1 |
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd , Country | 1 | 5.706 | 18.963 | 0.031 | 0.001 | 0.001 | *** |
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd : Country | 1 | 0.286 | 0.952 | 0.002 | 1 | 1 |
Interaction check
interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]
if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
group2 <- unlist(strsplit(group2,split = " : "))[1]
result_list <- adonis_postanalysis(x=pairwise_df,
factors = filt_colon_genus_metadata$Group,
covariate = filt_colon_genus_metadata$Country,
patients = filt_colon_genus_metadata$Patient,
group1 = group1,
group2 = group2,
sim.method = 'jaccard')
print(result_list)
}
}Plots
Custom
p <- pca_plot_custom(filt_colon_genus_tab,
filt_colon_genus_taxa,
filt_colon_genus_metadata,
measure = "jaccard",
show_boxplots = TRUE,
variable = "Group", size=3, show_legend=TRUE)
# save the results
supplements_beta[[paste("PCoA jaccard",level,segment)]] <- p
# see the results
p3.3.2.2 ASV level
level="ASV"3.3.2.2.1 Aitchison
# preparing data frame
pairwise_df <- filt_colon_asv_tab %>% column_to_rownames("SeqID") %>% t()
# main effect
pp_main <- pairwise.adonis(x=pairwise_df,
filt_colon_metadata$Group,
covariate = filt_colon_metadata$Country,
sim.method = "robust.aitchison",
p.adjust.m="BH",
patients = filt_colon_metadata$Patient)
# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_colon_metadata$Group,
covariate = filt_colon_metadata$Country,
interaction = TRUE,
sim.method = "robust.aitchison",
p.adjust.m="BH",
patients = filt_colon_metadata$Patient)
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]
cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols;
# save raw results
supplements_beta[[paste("aitchison",level,segment)]] <- rbind(pp_factor,
pp_cov,
pp_fac.cov)
# see the results
pp_factor pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
1 no_ibd vs ibd 1 249.7248 1.158657 0.001920557 1 1
pp_cov pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
1 no_ibd vs ibd , Country 1 1322.314 6.13519 0.01016952 0.001 0.001
sig
1 ***
pp_fac.cov pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
1 no_ibd vs ibd : Country 1 292.4347 1.357634 0.002249026 1 1
sig
1
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 249.725 | 1.159 | 0.002 | 1 | 1 |
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd , Country | 1 | 1322.314 | 6.135 | 0.01 | 0.001 | 0.001 | *** |
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd : Country | 1 | 292.435 | 1.358 | 0.002 | 1 | 1 |
Interaction check
interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]
if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
group2 <- unlist(strsplit(group2,split = " : "))[1]
result_list <- adonis_postanalysis(x=pairwise_df,
factors = filt_colon_metadata$Group,
covariate = filt_colon_metadata$Country,
group1 = group1,
group2 = group2,
patients = filt_colon_metadata$Patient)
print(result_list)
}
}PCoA
p <- pca_plot_custom(filt_colon_asv_tab,
filt_colon_taxa_tab,
filt_colon_metadata,
show_boxplots = TRUE,
variable = "Group",
size=3,
show_legend=TRUE)
# save the results
supplements_beta[[paste("PCoA aitchison",level,segment)]] <- p
# see the results
p3.3.2.2.2 Bray-Curtis
PERMANOVA
# preparing data frame
pairwise_df <- filt_colon_asv_tab %>% column_to_rownames("SeqID") %>% t()
# main effect
pp_main <- pairwise.adonis(pairwise_df,
filt_colon_metadata$Group,
covariate = filt_colon_metadata$Country,
patients = filt_colon_metadata$Patient,
sim.method = "bray", p.adjust.m="BH")
# interaction
pp_int <- pairwise.adonis(pairwise_df,
filt_colon_metadata$Group,
covariate = filt_colon_metadata$Country,
patients = filt_colon_metadata$Patient,
interaction = TRUE, sim.method = "bray", p.adjust.m="BH")
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]
cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols;
# save raw results
supplements_beta[[paste("bray",level,segment)]] <- rbind(pp_factor,
pp_cov,
pp_fac.cov)# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 0.36 | 1.086 | 0.002 | 1 | 1 |
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd , Country | 1 | 4.259 | 12.853 | 0.021 | 0.001 | 0.001 | *** |
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd : Country | 1 | 0.573 | 1.731 | 0.003 | 0.648 | 0.648 |
Interaction check
interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]
if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
group2 <- unlist(strsplit(group2,split = " : "))[1]
result_list <- adonis_postanalysis(x=pairwise_df,
factors = filt_colon_metadata$Group,
covariate = filt_colon_metadata$Country,
group1 = group1,
group2 = group2,
patients = filt_colon_metadata$Patient,
sim.method = 'bray')
print(result_list)
}
}PCoA
p <- pca_plot_custom(filt_colon_asv_tab,
filt_colon_taxa_tab,
filt_colon_metadata,
measure = "bray",
show_boxplots = TRUE,
variable = "Group", size=3, show_legend=TRUE)
# save the results
supplements_beta[[paste("PCoA bray",level,segment)]] <- p
# see the results
p3.3.2.2.3 Jaccard
PERMANOVA
# preparing data frame
pairwise_df <- filt_colon_asv_tab %>% column_to_rownames("SeqID") %>% t()
# main effect
pp_main <- pairwise.adonis(pairwise_df,
filt_colon_metadata$Group,
covariate = filt_colon_metadata$Country,
patients = filt_colon_metadata$Patient,
sim.method = "jaccard", p.adjust.m="BH")
# interaction
pp_int <- pairwise.adonis(pairwise_df,
filt_colon_metadata$Group,
covariate = filt_colon_metadata$Country,
patients = filt_colon_metadata$Patient,
interaction = TRUE, sim.method = "jaccard", p.adjust.m="BH")
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]
cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols;
# save raw results
supplements_beta[[paste("jaccard",level,segment)]] <- rbind(pp_factor,
pp_cov,
pp_fac.cov)# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 0.436 | 1.099 | 0.002 | 1 | 1 |
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd , Country | 1 | 3.371 | 8.495 | 0.014 | 0.001 | 0.001 | *** |
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd : Country | 1 | 0.57 | 1.438 | 0.002 | 0.992 | 0.992 |
Interaction check
interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]
if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
group2 <- unlist(strsplit(group2,split = " : "))[1]
result_list <- adonis_postanalysis(x=pairwise_df,
factors = filt_colon_metadata$Group,
covariate = filt_colon_metadata$Country,
patients = filt_colon_metadata$Patient,
group1 = group1,
group2 = group2,
sim.method = 'jaccard')
print(result_list)
}
}PCoA
p <- pca_plot_custom(filt_colon_asv_tab,
filt_colon_taxa_tab,
filt_colon_metadata,
measure = "jaccard",
show_boxplots = TRUE,
variable = "Group", size=3, show_legend=TRUE)
# save the results
supplements_beta[[paste("PCoA jaccard",level,segment)]] <- p
# see the results
p3.3.2.3 Saving results
write.xlsx(supplements_beta[!grepl("PCoA",names(supplements_beta))],
file = file.path(path,
paste0("supplements_beta_diversity_", segment,".xlsx")))3.4 Univariate Analysis
3.4.1 Main - Genus level
level="genus"# needed paths
path = "../results/Q3/univariate_analysis"
path_maaslin=file.path("../intermediate_files/maaslin/Q3",level)# variables
raw_linda_results_genus[[segment]] <- list()
linda_results_genus[[segment]] <- list()
# country and interaction problems
list_intersections <- list()
list_venns <- list()
uni_statistics <- list()
# workbook for final df
wb <- createWorkbook()
# PSC effect
psc_effect <- list()3.4.1.1 Genus level
level="genus"Aggregate taxa
genus_data <- aggregate_taxa(colon_asv_tab,
colon_taxa_tab,
taxonomic_level = level)
colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]
colon_genus_asv_taxa_tab <- create_asv_taxa_table(colon_genus_tab,
colon_genus_taxa_tab)3.4.1.1.1 ibd vs no_ibd
group <- c("no_ibd","ibd")
comparison_name <- paste0(group[1], " vs ",group[2])3.4.1.1.1.1 linda
# prepare the data
linda_data <- binomial_prep(colon_genus_tab,
colon_genus_taxa_tab,
colon_metadata,
group, usage="linDA")Removing 10 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]
# fit the model
linda.obj <- linda(filt_colon_uni_data,
filt_colon_uni_metadata,
formula = '~ Group * Country + (1|Patient)')0 features are filtered!
The filtered data has 600 samples and 147 features will be tested!
Imputation approach is used.
Fit linear mixed effects models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)
# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO")
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")
for (grp in c(group1,group2,group3)){
raw_linda_results_genus[[segment]][[grp]] <-
rawlinda.df(linda.output,
grp,
filt_colon_uni_data,
filt_colon_uni_taxa)
linda_results_genus[[segment]][[grp]] <-
linda.df(linda.output,
grp,
filt_colon_uni_data,
filt_colon_uni_taxa)
}# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1,
taxa_table = filt_colon_uni_taxa) +
ggtitle(paste(group,collapse=" vs "))
volcano_2 <- volcano_plot_linda(linda.output, group2,
taxa_table = filt_colon_uni_taxa) +
ggtitle("NO vs CZ")
volcano_3 <- volcano_plot_linda(linda.output, group3,
taxa_table = filt_colon_uni_taxa) +
ggtitle("Interaction effect")
volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)
# see the plot
volcano3.4.1.1.1.2 MaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) +
ggtitle(paste(group[1], "vs", group[2]))
volcano2 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa,variable="Country") +
ggtitle("Country effect")
volcano <- ggarrange(volcano1,volcano2, ncol=2)
volcano3.4.1.1.1.3 Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
linda.output, fit_data,
raw_linda_results_genus,
segment = segment,
level=level)
list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]
# show the results
venn3.4.1.1.1.4 Interaction effect
list_interaction_significant <- country_interaction(group,
linda.output,
list_intersections,
filt_colon_uni_data,
filt_colon_uni_metadata,
segment=segment,
level=level)
# see the result
## significant interaction effect
list_interaction_significant[[1]]NULL
## results for czech cohort
list_interaction_significant[[2]][1] NA
## results for norwegian cohort
list_interaction_significant[[3]][1] NA
Removing problematic taxa
list_intersections <- removing_interaction_problems(group,
list_interaction_significant,
list_intersections,
segment=segment,
level=level)3.4.1.1.2 Basic statistics
uni_df <- merge(basic_univariate_statistics(linda_data,group),
raw_linda_results_genus[[segment]][[group1]],
by="SeqID",all=TRUE)[1] "no_ibd"
[1] "ibd"
[1] "no_ibd"
[1] "ibd"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df
# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)3.4.1.1.3 Visualization
Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.
list_heatmap <- list_intersections[grep(paste(segment,level),
names(list_intersections),value=TRUE)]
if (length((list_heatmap[[1]][[1]]))>1){
p_heatmap_linda <- heatmap_linda(list_heatmap,colon_taxa_tab)
p_heatmap_linda
}Dot heatmap
if (length((list_heatmap[[1]][[1]]))>0){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
uni_statistics$terminal_ileum[grepl(level,names(uni_statistics$terminal_ileum))],
colon_taxa_tab)
dotheatmap_linda
}Horizontal bar plot
if (length((list_heatmap[[1]][[1]]))>0){
p_prevalence <- horizontal_barplot(wb,taxa=levels(dotheatmap_linda$data$SeqID))
p_prevalence_final <- ggarrange(p_prevalence,
ggplot() + theme_minimal(),
nrow = 2,heights = c(1,0.085))
p <- ggarrange(dotheatmap_linda + theme(legend.position = "none"),p_prevalence_final,ncol=2,widths = c(1,0.2))
p
}3.4.1.2 Saving results
# ALL DATA
saveWorkbook(wb,file.path(path,paste0("uni_analysis_wb_",segment,".xlsx")),
overwrite = TRUE)
# SIGNIFICANT taxa
write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
`names<-`(gsub(segment, "", names(
list_intersections[grepl(segment,names(list_intersections))]))),
file.path(path,paste0("significant_taxa_",segment,".xlsx")))3.4.2 Supplementary Analysis
3.4.2.1 ASV level
level="ASV"path_maaslin="../intermediate_files/maaslin/Q3/ASV/"raw_linda_results[[segment]] <- list()
linda_results[[segment]] <- list()
supplements_wb <- createWorkbook()3.4.2.1.1 ibd vs no_ibd
group <- c("no_ibd","ibd")
comparison_name <- paste0(group[1], " vs ",group[2])linDA
# prepare the data
linda_data <- binomial_prep(colon_asv_tab,
colon_taxa_tab,
colon_metadata,
group, usage="linDA")Removing 7 ASV(s)
Removing 45 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]
# fit the model
linda.obj <- linda(filt_colon_uni_data,
filt_colon_uni_metadata,
formula = '~ Group * Country + (1|Patient)')0 features are filtered!
The filtered data has 599 samples and 306 features will be tested!
Imputation approach is used.
Fit linear mixed effects models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)
group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO")
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")
for (grp in c(group1,group2,group3)){
raw_linda_results[[segment]][[grp]] <-
rawlinda.df(linda.output,
grp,
filt_colon_uni_data,
filt_colon_uni_taxa)
linda_results[[segment]][[grp]] <-
linda.df(linda.output,
grp,
filt_colon_uni_data,
filt_colon_uni_taxa)
}# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1,
taxa_table = filt_colon_uni_taxa) +
ggtitle(paste(group,collapse=" vs "))
volcano_2 <- volcano_plot_linda(linda.output, group2,
taxa_table = filt_colon_uni_taxa) +
ggtitle("Country effect")
volcano_3 <- volcano_plot_linda(linda.output, group3,
taxa_table = filt_colon_uni_taxa) +
ggtitle("Interaction effect")
volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)
# see the plot
volcanoMaAsLin2
Volcano plot
volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) +
ggtitle(paste(group[1], "vs", group[2]))
volcano2 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa,variable="Country") +
ggtitle("Country effect")
volcano <- ggarrange(volcano1,volcano2, ncol=2)
# see the results
volcanoGroup - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
linda.output, fit_data,
raw_linda_results,
segment = segment,
level=level)
list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]
# show the results
vennInteraction effect
list_interaction_significant <- country_interaction(group,
linda.output,
list_intersections,
filt_colon_uni_data,
filt_colon_uni_metadata,
segment=segment,
level=level)
# see the result
## significant interaction effect
list_interaction_significant[[1]]NULL
## results for czech cohort
list_interaction_significant[[2]][1] NA
## results for norwegian cohort
list_interaction_significant[[3]][1] NA
Removing problematic taxa
list_intersections <- removing_interaction_problems(group,
list_interaction_significant,
list_intersections,
segment=segment,
level=level)Basic statistics
uni_df <- merge(basic_univariate_statistics(linda_data,group),
raw_linda_results[[segment]][[group1]],
by="SeqID",all=TRUE)[1] "no_ibd"
[1] "ibd"
[1] "no_ibd"
[1] "ibd"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df
# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)3.4.2.1.2 Visualization
Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.
list_heatmap <- list_intersections[grep(paste(segment,level),
names(list_intersections),value=TRUE)]
if (length((list_heatmap[[1]][[1]]))>1){
p_heatmap_linda <- heatmap_linda(list_heatmap,colon_taxa_tab)
p_heatmap_linda
}Dot heatmap
if (length((list_heatmap[[1]][[1]]))>0){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
uni_statistics$colon[grepl(level,names(uni_statistics$colon))],
colon_taxa_tab)
dotheatmap_linda
}min_clr -1.225739
max_clr -1.225739
min_log 4.018077
max_log 4.018077
3.4.2.2 Phylum level
level="phylum"path_maaslin="../intermediate_files/maaslin/Q3/Phylum/"raw_linda_results_phylum[[segment]] <- list()
linda_results_phylum[[segment]] <- list()Aggregate taxa
phylum_data <- aggregate_taxa(colon_asv_tab,
colon_taxa_tab,
taxonomic_level = "Phylum")
colon_phylum_tab <- phylum_data[[1]]
colon_phylum_taxa_tab <- phylum_data[[2]]3.4.2.2.1 ibd vs no_ibd
group <- c("no_ibd","ibd")
comparison_name <- paste0(group[1], " vs ",group[2])linDA
# prepare the data
linda_data <- binomial_prep(colon_phylum_tab,
colon_phylum_taxa_tab,
colon_metadata,
group, usage="linDA")Removing 1 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]
# fit the model
linda.obj <- linda(filt_colon_uni_data,
filt_colon_uni_metadata,
formula = '~ Group * Country + (1|Patient)')0 features are filtered!
The filtered data has 600 samples and 8 features will be tested!
Imputation approach is used.
Fit linear mixed effects models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)
group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO")
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")
for (grp in c(group1,group2,group3)){
raw_linda_results[[segment]][[grp]] <-
rawlinda.df(linda.output,
grp,
filt_colon_uni_data,
filt_colon_uni_taxa)
linda_results[[segment]][[grp]] <-
linda.df(linda.output,
grp,
filt_colon_uni_data,
filt_colon_uni_taxa)
}# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1,
taxa_table = filt_colon_uni_taxa) +
ggtitle(paste(group,collapse=" vs "))Using Phylum for naming
volcano_2 <- volcano_plot_linda(linda.output, group2,
taxa_table = filt_colon_uni_taxa) +
ggtitle("Country effect")Using Phylum for naming
volcano_3 <- volcano_plot_linda(linda.output, group3,
taxa_table = filt_colon_uni_taxa) +
ggtitle("Interaction effect")Using Phylum for naming
volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)
# see the plot
volcanoMaAsLin2
Volcano plot
volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) +
ggtitle(paste(group[1], "vs", group[2]))
volcano2 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa,variable="Country") +
ggtitle("Country effect")
volcano <- ggarrange(volcano1,volcano2, ncol=2)
# see the results
volcanoGroup - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
linda.output, fit_data,
raw_linda_results,
segment = segment,
level=level)
list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]
# show the results
vennInteraction effect
list_interaction_significant <- country_interaction(group,
linda.output,
list_intersections,
filt_colon_uni_data,
filt_colon_uni_metadata,
segment=segment,
level=level)
# see the result
## significant interaction effect
list_interaction_significant[[1]]NULL
## results for czech cohort
list_interaction_significant[[2]][1] NA
## results for norwegian cohort
list_interaction_significant[[3]][1] NA
Removing problematic taxa
list_intersections <- removing_interaction_problems(group,
list_interaction_significant,
list_intersections,
segment=segment,
level=level)Basic statistics
uni_df <- merge(basic_univariate_statistics(linda_data,group),
raw_linda_results[[segment]][[group1]],
by="SeqID",all=TRUE)[1] "no_ibd"
[1] "ibd"
[1] "no_ibd"
[1] "ibd"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df
# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)3.4.2.2.2 Visualization
Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.
list_heatmap <- list_intersections[grep(paste(segment,level),
names(list_intersections),value=TRUE)]
if (length((list_heatmap[[1]][[1]]))>1){
p_heatmap_linda <- heatmap_linda(list_heatmap,colon_taxa_tab)
p_heatmap_linda
}Dot heatmap
if (length((list_heatmap[[1]][[1]]))>0){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,uni_statistics$colon[grepl(level,names(uni_statistics$colon))],colon_taxa_tab)
dotheatmap_linda
}3.4.2.3 Saving results
# ALL DATA
saveWorkbook(supplements_wb,file.path(path,paste0("supplements_uni_analysis_wb_",segment,".xlsx")),overwrite = TRUE)
# SIGNIFICANT taxa
write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
`names<-`(gsub(segment, "", names(
list_intersections[grepl(segment,names(list_intersections))]))),
file.path(path,paste0("supplements_significant_taxa_",segment,".xlsx")))3.5 Results overview
3.5.0.1 Alpha diversity
knitr::kable(pc_observed[[segment]],
digits = 3,
caption = "Results of linear model testing ASV Richness")| Estimate | Std..Error | df | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|---|
| no_ibd vs Groupibd | -2.810 | 11.952 | 439.998 | -0.235 | 0.814 | 0.814 | |
| no_ibd vs ibd - CZ vs NO | -10.684 | 15.292 | 332.353 | -0.699 | 0.485 | 0.485 | |
| no_ibd vs Groupibd:CountryNO | -5.680 | 16.562 | 341.720 | -0.343 | 0.732 | 0.732 |
knitr::kable(pc_shannon[[segment]],
digits = 3,
caption = "Results of linear model testing Shannon index")| Estimate | Std..Error | df | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|---|
| no_ibd vs Groupibd | 0.033 | 0.158 | 456.049 | 0.211 | 0.833 | 0.833 | |
| no_ibd vs ibd - CZ vs NO | -0.138 | 0.203 | 336.238 | -0.678 | 0.498 | 0.498 | |
| no_ibd vs Groupibd:CountryNO | -0.119 | 0.220 | 346.763 | -0.540 | 0.589 | 0.589 |
knitr::kable(pc_simpson[[segment]],
digits = 3,
caption = "Results of linear model testing Simpson index")| Estimate | Std..Error | df | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|---|
| no_ibd vs Groupibd | -0.007 | 0.026 | 453.407 | -0.261 | 0.794 | 0.794 | |
| no_ibd vs ibd - CZ vs NO | -0.025 | 0.033 | 334.532 | -0.744 | 0.457 | 0.457 | |
| no_ibd vs Groupibd:CountryNO | 0.005 | 0.036 | 344.950 | 0.131 | 0.896 | 0.896 |
knitr::kable(pc_pielou[[segment]],
digits = 3,
caption = "Results of linear model testing Pielou index")Table: Results of linear model testing Pielou index
Plots
alpha_div_plots[[paste(segment,"Country")]]3.5.0.2 Beta diversity
Main results
knitr::kable(pairwise_aitchison_raw[[paste("genus", segment)]],
digits = 3,
caption = "Results of PERMANOVA - robust aitchison distance")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 172.580 | 1.118 | 0.002 | 1.000 | 1.000 | |
| no_ibd vs ibd , Country | 1 | 1345.829 | 8.716 | 0.014 | 0.001 | 0.001 | *** |
| no_ibd vs ibd : Country | 1 | 171.139 | 1.109 | 0.002 | 1.000 | 1.000 |
PCA
pca_plots_list[[paste(segment,"genus custom")]]Supplements
knitr::kable(supplements_beta[!grepl("PCoA",names(supplements_beta)) & (grepl("genus",names(supplements_beta)))],
digits = 3,
caption = "Supplementary PERMANOVA results: Bray-curtis, Jaccard distances")
|
|
|
|
PCA
plot_list <- supplements_beta[grepl("PCoA",names(supplements_beta)) &
grepl(segment,names(supplements_beta))]
ggarrange(plotlist = plot_list,
labels=names(plot_list),
font.label = list(size=5,face="plain"),
ncol=2,nrow=3)3.5.0.3 Univariate analysis
Number of significant taxa
knitr::kable(as.data.frame(lapply(list_intersections,nrow)) %>% t() %>%
`colnames<-`("Count") %>%
`rownames<-`(c(names(list_intersections))),caption="Number of significant taxa")| Count | |
|---|---|
| colon genus no_ibd vs ibd | 0 |
| colon ASV no_ibd vs ibd | 1 |
| colon phylum no_ibd vs ibd | 0 |
4 Paper-ready visualizations
4.1 Alpha diversity
p_A <- alpha_div_plots$`terminal_ileum Country` +
ggtitle("Terminal ileum")+
theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15))
p_B <- alpha_div_plots$`colon Country` +
ggtitle("Colon") +
theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15))
Q3_alpha <- ggarrange(p_A,ggplot() + theme_minimal(),p_B,nrow=1, ncol=3,
widths = c(1,0.1,1))
Q3_alpha4.2 Beta diversity
pca_ti <- pca_plots_list$`terminal_ileum genus custom`
pca_colon <- pca_plots_list$`colon genus custom`
genus_Q3_beta <- ggarrange(pca_ti,
ggplot() + theme_minimal(),
pca_colon,ncol=3,
widths = c(1,0.1,1))
genus_Q3_betaAlpha + Beta diversity
alpha_beta <- ggarrange(Q3_alpha,genus_Q3_beta,
ncol = 1,nrow=2,labels = c("A","B"))
alpha_beta4.3 FIGURE 7
alpha_beta <- ggarrange(alpha_beta,ggplot() + theme_minimal(),ncol=2,
widths = c(1,0.15))
alpha_beta4.4 DAA
if (exists("dot_heatmap_ileum")){
p_ileum <- dot_heatmap_ileum +
ggtitle("Terminal ileum") +
theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
legend.position = "none")
}
if (exists("dot_heatmap_colon")){
p_colon <- dot_heatmap_colon +
ggtitle("Colon") +
theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
legend.position = "none")
}
if (exists("p_ileum") | exists("p_colon")){
heatmap_plot <- ggarrange(p_ileum,
p_colon,
ncol = 2,labels=c("A","B"))
heatmap_plot
}5 Session info
sessionInfo()R version 4.3.1 (2023-06-16 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 11 x64 (build 26100)
Matrix products: default
locale:
[1] LC_COLLATE=Czech_Czechia.utf8 LC_CTYPE=Czech_Czechia.utf8
[3] LC_MONETARY=Czech_Czechia.utf8 LC_NUMERIC=C
[5] LC_TIME=Czech_Czechia.utf8
time zone: Europe/Prague
tzcode source: internal
attached base packages:
[1] parallel stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] mice_3.16.0 picante_1.8.2 ape_5.8
[4] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1
[7] tidyverse_2.0.0 kableExtra_1.4.0 tidyr_1.3.1
[10] gbm_2.2.2 doParallel_1.0.17 iterators_1.0.14
[13] foreach_1.5.2 ranger_0.17.0 ggvenn_0.1.15
[16] Maaslin2_1.16.0 purrr_1.0.2 pROC_1.18.5
[19] glmnet_4.1-8 MicrobiomeStat_1.2 caret_6.0-94
[22] openxlsx_4.2.6.1 magrittr_2.0.3 emmeans_1.10.7
[25] lmerTest_3.1-3 robustlmm_3.3-1 lme4_1.1-35.5
[28] Matrix_1.6-5 mgcv_1.9-1 nlme_3.1-164
[31] pheatmap_1.0.12 reshape2_1.4.4 vegan_2.6-4
[34] lattice_0.22-6 permute_0.9-7 ggplotify_0.1.2
[37] ggrepel_0.9.5 ggpubr_0.6.0 MicrobiotaProcess_1.14.1
[40] phyloseq_1.46.0 ggplot2_3.5.1 tibble_3.2.1
[43] dplyr_1.1.4 cowplot_1.1.3 readr_2.1.5
[46] igraph_2.0.3 ccrepe_1.38.1 data.table_1.15.4
loaded via a namespace (and not attached):
[1] fs_1.6.4 matrixStats_1.3.0
[3] bitops_1.0-7 RColorBrewer_1.1-3
[5] numDeriv_2016.8-1.1 tools_4.3.1
[7] backports_1.4.1 R6_2.6.1
[9] lazyeval_0.2.2 jomo_2.7-6
[11] rhdf5filters_1.14.1 withr_3.0.2
[13] gridExtra_2.3 cli_3.6.2
[15] Biobase_2.62.0 logging_0.10-108
[17] biglm_0.9-3 sandwich_3.1-1
[19] labeling_0.4.3 mvtnorm_1.2-4
[21] robustbase_0.99-3 pbapply_1.7-2
[23] systemfonts_1.1.0 yulab.utils_0.2.0
[25] svglite_2.1.3 parallelly_1.38.0
[27] rstudioapi_0.17.1 generics_0.1.3
[29] gridGraphics_0.5-1 shape_1.4.6.1
[31] car_3.1-3 zip_2.3.1
[33] biomformat_1.30.0 S4Vectors_0.40.2
[35] abind_1.4-8 infotheo_1.2.0.1
[37] lifecycle_1.0.4 multcomp_1.4-28
[39] yaml_2.3.8 carData_3.0-5
[41] SummarizedExperiment_1.32.0 Rtsne_0.17
[43] rhdf5_2.46.1 recipes_1.1.1
[45] SparseArray_1.2.4 grid_4.3.1
[47] mitml_0.4-5 crayon_1.5.3
[49] pillar_1.10.1 knitr_1.50
[51] optparse_1.7.5 GenomicRanges_1.54.1
[53] statip_0.2.3 boot_1.3-31
[55] estimability_1.5.1 future.apply_1.11.3
[57] codetools_0.2-20 pan_1.9
[59] glue_1.7.0 ggfun_0.1.8
[61] vctrs_0.6.5 treeio_1.26.0
[63] gtable_0.3.6 gower_1.0.1
[65] xfun_0.51 S4Arrays_1.2.1
[67] prodlim_2024.06.25 libcoin_1.0-10
[69] coda_0.19-4.1 pcaPP_2.0-4-1
[71] modeest_2.4.0 survival_3.5-8
[73] timeDate_4041.110 hardhat_1.4.1
[75] lava_1.8.1 statmod_1.5.0
[77] TH.data_1.1-3 ipred_0.9-15
[79] ggtree_3.10.1 GenomeInfoDb_1.38.8
[81] fBasics_4032.96 rpart_4.1.23
[83] colorspace_2.1-0 BiocGenerics_0.48.1
[85] DBI_1.2.3 nnet_7.3-19
[87] ade4_1.7-22 tidyselect_1.2.1
[89] timeSeries_4041.111 compiler_4.3.1
[91] microbiome_1.24.0 xml2_1.3.6
[93] DelayedArray_0.28.0 scales_1.3.0
[95] DEoptimR_1.1-3-1 spatial_7.3-17
[97] digest_0.6.35 minqa_1.2.8
[99] rmarkdown_2.29 XVector_0.42.0
[101] htmltools_0.5.8.1 pkgconfig_2.0.3
[103] MatrixGenerics_1.14.0 stabledist_0.7-2
[105] fastmap_1.2.0 rlang_1.1.3
[107] htmlwidgets_1.6.4 farver_2.1.2
[109] zoo_1.8-12 jsonlite_1.8.8
[111] ModelMetrics_1.2.2.2 RCurl_1.98-1.14
[113] modeltools_0.2-23 Formula_1.2-5
[115] GenomeInfoDbData_1.2.11 patchwork_1.3.0
[117] Rhdf5lib_1.24.2 munsell_0.5.1
[119] Rcpp_1.0.12 ggnewscale_0.5.1
[121] fastGHQuad_1.0.1 stringi_1.8.3
[123] ggstar_1.0.4 stable_1.1.6
[125] zlibbioc_1.48.2 MASS_7.3-60.0.1
[127] plyr_1.8.9 listenv_0.9.1
[129] Biostrings_2.70.3 splines_4.3.1
[131] hash_2.2.6.3 multtest_2.58.0
[133] hms_1.1.3 ggtreeExtra_1.12.0
[135] ggsignif_0.6.4 stats4_4.3.1
[137] rmutil_1.1.10 evaluate_1.0.3
[139] nloptr_2.1.1 tzdb_0.4.0
[141] getopt_1.20.4 future_1.34.0
[143] clue_0.3-65 coin_1.4-3
[145] broom_1.0.7 xtable_1.8-4
[147] tidytree_0.4.6 rstatix_0.7.2
[149] viridisLite_0.4.2 class_7.3-22
[151] aplot_0.2.5 IRanges_2.36.0
[153] cluster_2.1.6 timechange_0.3.0
[155] globals_0.16.3